microsoft · snnn · Dec 13, 2023 · Dec 11, 2023 · Dec 12, 2023 · Dec 12, 2023
diff --git a/onnxruntime/contrib_ops/cpu/math/sparse_dense_matmul.cc b/onnxruntime/contrib_ops/cpu/math/sparse_dense_matmul.cc
@@ -64,7 +64,8 @@ inline void SparseDenseMatMulImpl(const ComputeCtx& ctx, const ConstSparseMatrix
 
 template <>
 inline void SparseDenseMatMulImpl<float>(const ComputeCtx& ctx, const ConstSparseMatrixMap<float>& map_A,
-                                         const ConstEigenMatrixMapRowMajor<float>& map_B, EigenMatrixMapRowMajor<float>& output_map) {
+                                         const ConstEigenMatrixMapRowMajor<float>& map_B,
+                                         EigenMatrixMapRowMajor<float>& output_map) {
   if (ctx.trans_A && ctx.trans_B) {
     output_map = map_A.transpose() * ctx.alpha * map_B.transpose();
   } else if (ctx.trans_A && !ctx.trans_B) {
@@ -84,13 +85,40 @@ struct SparseToDenseCsr {
     const auto& b_dims = B.Shape().GetDims();
     const auto& out_dims = output.Shape().GetDims();
     auto csr_view = A.AsCsr();
-
-    ConstSparseMatrixMap<T> map_A(a_dims[0], a_dims[1], A.NumValues(),
-                                  csr_view.Outer().Data<int64_t>(),
-                                  csr_view.Inner().Data<int64_t>(),
+    const Eigen::Index* inner_index_pointer = nullptr;
+    const Eigen::Index* outer_index_pointer = nullptr;
+    std::unique_ptr<Eigen::Index[]> buffer_holder_inner, buffer_holder_outer;
+    if constexpr (std::is_integral<Eigen::Index>::value &&
+                  std::is_signed<Eigen::Index>::value &&
+                  (sizeof(Eigen::Index) == sizeof(int64_t))) {
+      // On macOS the following reinterpret_cast is necessary because Eigen::Index is an alias of `long` but int64_t is
+      // `long long`. Though they have the same size, compilers still do not allow an implicit casting between them.
+      inner_index_pointer = reinterpret_cast<const Eigen::Index*>(csr_view.Inner().Data<int64_t>());
+      outer_index_pointer = reinterpret_cast<const Eigen::Index*>(csr_view.Outer().Data<int64_t>());
+    } else {
+      const size_t inner_size = narrow<size_t>(csr_view.Inner().Shape().Size());
+      const size_t outer_size = narrow<size_t>(csr_view.Outer().Shape().Size());
+      buffer_holder_inner.reset(new Eigen::Index[inner_size]);
+      buffer_holder_outer.reset(new Eigen::Index[outer_size]);
+      inner_index_pointer = buffer_holder_inner.get();
+      outer_index_pointer = buffer_holder_outer.get();
+      const int64_t* inner_begin = csr_view.Inner().Data<int64_t>();
+      const int64_t* outer_begin = csr_view.Outer().Data<int64_t>();
+      std::transform<const int64_t*, Eigen::Index*>(inner_begin, inner_begin + inner_size,
+                                                    buffer_holder_inner.get(), [](int64_t v) -> Eigen::Index {
+                                                      return narrow<Eigen::Index>(v);
+                                                    });
+      std::transform<const int64_t*, Eigen::Index*>(outer_begin, outer_begin + outer_size,
+                                                    buffer_holder_outer.get(), [](int64_t v) -> Eigen::Index {
+                                                      return narrow<Eigen::Index>(v);
+                                                    });
+    }
+    ConstSparseMatrixMap<T> map_A(narrow<Eigen::Index>(a_dims[0]), narrow<Eigen::Index>(a_dims[1]),
+                                  narrow<Eigen::Index>(A.NumValues()), outer_index_pointer, inner_index_pointer,
                                   A.Values().Data<T>());
-    ConstEigenMatrixMapRowMajor<T> map_B(B.Data<T>(), b_dims[0], b_dims[1]);
-    EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), out_dims[0], out_dims[1]);
+    ConstEigenMatrixMapRowMajor<T> map_B(B.Data<T>(), narrow<Eigen::Index>(b_dims[0]), narrow<Eigen::Index>(b_dims[1]));
+    EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), narrow<Eigen::Index>(out_dims[0]),
+                                         narrow<Eigen::Index>(out_dims[1]));
     // XXX: Consider re-writing it as a parallel loop as Eigen requires it to use OpenMP
     // XXX: Consider vectorization
     SparseDenseMatMulImpl(ctx, map_A, map_B, output_map);
@@ -121,9 +149,11 @@ struct SparseToDenseCoo {
     auto coo_view = A.AsCoo();
     const auto& ind_dims = coo_view.Indices().Shape().GetDims();
     ORT_RETURN_IF_NOT(ind_dims.size() == 2, "COO indices must be 2-D, got: ", ind_dims.size());
-    ConstEigenMatrixMapRowMajor<int64_t> a_indicies_map(coo_view.Indices().Data<int64_t>(), narrow<size_t>(ind_dims[0]), narrow<size_t>(ind_dims[1]));
+    ConstEigenMatrixMapRowMajor<int64_t> a_indicies_map(coo_view.Indices().Data<int64_t>(), narrow<size_t>(ind_dims[0]),
+                                                        narrow<size_t>(ind_dims[1]));
     ConstEigenMatrixMapRowMajor<T> map_b(B.Data<T>(), narrow<size_t>(b_dims[0]), narrow<size_t>(b_dims[1]));
-    EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), narrow<size_t>(out_dims[0]), narrow<size_t>(out_dims[1]));
+    EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), narrow<size_t>(out_dims[0]),
+                                         narrow<size_t>(out_dims[1]));
     output_map.setZero();
 
     const auto rhs_right = (ctx.trans_B) ? b_dims[0] : b_dims[1];
@@ -140,7 +170,8 @@ struct SparseToDenseCoo {
       ORT_RETURN_IF_NOT(m < out_left, "COO m index: ", m, " is out of bounds of out_left: ", out_left);
       const T a_value = a_values[i];
       for (int64_t n = 0; n < rhs_right; ++n) {
-        const T b_value = (ctx.trans_B) ? map_b(narrow<size_t>(n), narrow<size_t>(k)) : map_b(narrow<size_t>(k), narrow<size_t>(n));
+        const T b_value =
+            (ctx.trans_B) ? map_b(narrow<size_t>(n), narrow<size_t>(k)) : map_b(narrow<size_t>(k), narrow<size_t>(n));
         output_map(narrow<size_t>(m), narrow<size_t>(n)) += Mul(a_value, ctx.alpha, b_value);
       }
     }
@@ -170,8 +201,9 @@ Status SparseToDenseMatMul::Compute(OpKernelContext* ctx) const {
   const auto inner_B = (trans_b_attr_) ? b_dims[1] : b_dims[0];
   const auto outer_B = (trans_b_attr_) ? b_dims[0] : b_dims[1];
 
-  ORT_RETURN_IF_NOT(inner_A == inner_B, "Can not multiply A and B as inner dimension does not match. inner_A: ",
-                    inner_A, " vs inner_B: ", inner_B);
+  ORT_RETURN_IF_NOT(inner_A == inner_B,
+                    "Can not multiply A and B as inner dimension does not match. inner_A: ", inner_A,
+                    " vs inner_B: ", inner_B);
 
   TensorShape output_shape{outer_A, outer_B};
   auto* output = ctx->Output(0, output_shape);
@@ -184,7 +216,8 @@ Status SparseToDenseMatMul::Compute(OpKernelContext* ctx) const {
     auto coo_view = A->AsCoo();
     const auto num_dims = coo_view.Indices().Shape().NumDimensions();
     ORT_RETURN_IF_NOT(num_dims == 2, "Expecting COO 2-D indices shape");
-    ORT_RETURN_IF_NOT(A->Values().Shape().Size() * 2 == coo_view.Indices().Shape().Size(), "Expecting 2xValues == indices");
+    ORT_RETURN_IF_NOT(A->Values().Shape().Size() * 2 == coo_view.Indices().Shape().Size(),
+                      "Expecting 2xValues == indices");
     auto status = t_disp.InvokeRet<Status, SparseToDenseCoo>(compute_ctx, *A, *B, *output);
     ORT_RETURN_IF_ERROR(status);
 // Eigen has a bug in x86 where it calculates reallocation size as -1
@@ -211,4 +244,4 @@ Status SparseToDenseMatMul::Compute(OpKernelContext* ctx) const {
 }  // namespace contrib
 }  // namespace onnxruntime
 
-#endif  //! defined(DISABLE_SPARSE_TENSORS)
+#endif  //! defined(DISABLE_SPARSE_TENSORS)
diff --git a/onnxruntime/core/util/math_cpuonly.h b/onnxruntime/core/util/math_cpuonly.h
@@ -93,7 +93,7 @@ template <typename T>
 using ConstEigenMatrixMap = Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;
 
 template <class T>
-using ConstSparseMatrixMap = Eigen::Map<const Eigen::SparseMatrix<T, Eigen::RowMajor, int64_t>>;
+using ConstSparseMatrixMap = Eigen::Map<const Eigen::SparseMatrix<T, Eigen::RowMajor, Eigen::Index>>;
 
 template <typename T>
 using ConstEigenArrayMap = Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic>>;

diff --git a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
@@ -106,8 +106,7 @@ stages:
           ls $(Build.BinariesDirectory)/gccbin/bin
           mkdir $(Build.BinariesDirectory)/arm32build
           cd $(Build.BinariesDirectory)/arm32build
-          # TODO: fix the warnings and remove the --compile-no-warning-as-error arg
-          cmake --compile-no-warning-as-error $(Build.SourcesDirectory)/cmake -Donnxruntime_ENABLE_CPUINFO=OFF -DPython_EXECUTABLE=/usr/bin/python3 -DPYTHON_EXECUTABLE=/usr/bin/python3 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$(Build.SourcesDirectory)/cmake/linux_arm32_crosscompile_toolchain.cmake -G Ninja
+          cmake $(Build.SourcesDirectory)/cmake -Donnxruntime_ENABLE_CPUINFO=OFF -DPython_EXECUTABLE=/usr/bin/python3 -DPYTHON_EXECUTABLE=/usr/bin/python3 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$(Build.SourcesDirectory)/cmake/linux_arm32_crosscompile_toolchain.cmake -G Ninja
           ninja
           rm -rf $(Build.BinariesDirectory)/arm32build $(Build.BinariesDirectory)/gccbin
         displayName: Cross-compile for Linux ARM32 and ARM64