upgrade neural_speed to v0.3

microsoft · Mar 15, 2024 · 3df1f25 · 3df1f25
1 parent 7b44606
commit 3df1f25
Show file tree

Hide file tree

Showing 4 changed files with 9 additions and 9 deletions.
diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json
@@ -206,7 +206,7 @@
       "component": {
         "type": "git",
         "git": {
-          "commitHash": "c11386eb632eec7c1c2aa323142f73519f946e2a",
+          "commitHash": "150e7527d5286ddd3a995c228dedf8d76a7a86bc",
           "repositoryUrl": "https://github.com/intel/neural-speed.git"
         },
         "comments": "neural_speed"

diff --git a/cmake/deps.txt b/cmake/deps.txt
@@ -35,7 +35,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36
 microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
 mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
 mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
-neural_speed;https://github.com/intel/neural-speed/archive/refs/tags/bestlav0.1.1.zip;65b0f7a0d04f72f0d5a8d48af70f0366f2ab3939
+neural_speed;https://github.com/intel/neural-speed/archive/refs/tags/v0.3.zip;5ec64e3071edc7347ebd8a81679cf06e2bb9b851
 onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.15.0.zip;54c3f960a0541c5d8d3e60c2933e11f5d3688a11
 #use the commit of Final DDS removal. DDS output is now supported by ORT TRT.
 onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/bacfaaa951653cd4e72efe727a543567cb38f7de.zip;26434329612e804164ab7baa6ae629ada56c1b26

diff --git a/cmake/external/neural_speed.cmake b/cmake/external/neural_speed.cmake
@@ -9,7 +9,7 @@ if(USE_NEURAL_SPEED)
       neural_speed
       URL ${DEP_URL_neural_speed}
       URL_HASH SHA1=${DEP_SHA1_neural_speed}
-      PATCH_COMMAND ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/neural_speed/65b0f7a0d04f72f0d5a8d48af70f0366f2ab3939.patch
+      PATCH_COMMAND ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/neural_speed/150e7527d5286ddd3a995c228dedf8d76a7a86bc.patch
   )
   set(BTLA_USE_OPENMP OFF)
   onnxruntime_fetchcontent_makeavailable(neural_speed)

diff --git a/...7a0d04f72f0d5a8d48af70f0366f2ab3939.patch → ...527d5286ddd3a995c228dedf8d76a7a86bc.patch b/...7a0d04f72f0d5a8d48af70f0366f2ab3939.patch → ...527d5286ddd3a995c228dedf8d76a7a86bc.patch
@@ -1,13 +1,13 @@
-diff --git a/bestla/bestla_prologue_b.h b/bestla/bestla_prologue_b.h
-index 28a7347..ce0880b 100644
---- a/bestla/bestla_prologue_b.h
-+++ b/bestla/bestla_prologue_b.h
-@@ -403,9 +403,8 @@ class WeightKBlockNInteger {
+diff --git a/bestla/bestla/bestla_prologue_b.h b/bestla/bestla/bestla_prologue_b.h
+index 99f3ccc..a11de9d 100644
+--- a/bestla/bestla/bestla_prologue_b.h
++++ b/bestla/bestla/bestla_prologue_b.h
+@@ -456,9 +456,8 @@ class WeightKBlockNInteger {
      auto tmpscales = tmp;
      auto tmpzeropoints = reinterpret_cast<int8_t*>(tmpscales + N * blks);
      if (scales) {
 -      for (size_t i = 0; i < N * blks; i += 2) {
-+      for (size_t i = 0; i < N * blks; i++) {
++      for (size_t i = 0; i < N * blks; i ++) {
          tmpscales[i] = scales[i] / 16;
 -        tmpscales[i + 1] = scales[i + 1] / 16;
        }