Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a build warning in SparseTensor code for 32-bit build configs #18766

Merged
merged 6 commits into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 48 additions & 25 deletions onnxruntime/contrib_ops/cpu/math/sparse_dense_matmul.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ struct ComputeCtx {
float alpha;
};

#if !defined(__i386__) && !defined(_M_IX86) && !defined(__wasm__) && !defined(__ANDROID__)
template <typename T>
inline void SparseDenseMatMulImpl(const ComputeCtx& ctx, const ConstSparseMatrixMap<T>& map_A,
const ConstEigenMatrixMapRowMajor<T>& map_B, EigenMatrixMapRowMajor<T>& output_map) {
Expand All @@ -64,7 +63,8 @@ inline void SparseDenseMatMulImpl(const ComputeCtx& ctx, const ConstSparseMatrix

template <>
inline void SparseDenseMatMulImpl<float>(const ComputeCtx& ctx, const ConstSparseMatrixMap<float>& map_A,
const ConstEigenMatrixMapRowMajor<float>& map_B, EigenMatrixMapRowMajor<float>& output_map) {
const ConstEigenMatrixMapRowMajor<float>& map_B,
EigenMatrixMapRowMajor<float>& output_map) {
if (ctx.trans_A && ctx.trans_B) {
output_map = map_A.transpose() * ctx.alpha * map_B.transpose();
} else if (ctx.trans_A && !ctx.trans_B) {
Expand All @@ -84,21 +84,47 @@ struct SparseToDenseCsr {
const auto& b_dims = B.Shape().GetDims();
const auto& out_dims = output.Shape().GetDims();
auto csr_view = A.AsCsr();
snnn marked this conversation as resolved.
Show resolved Hide resolved

ConstSparseMatrixMap<T> map_A(a_dims[0], a_dims[1], A.NumValues(),
csr_view.Outer().Data<int64_t>(),
csr_view.Inner().Data<int64_t>(),
const Eigen::Index* inner_index_pointer = nullptr;
const Eigen::Index* outer_index_pointer = nullptr;
// For auto-release the above two pointers when they are not NULL.
std::unique_ptr<Eigen::Index[]> buffer_holder_inner, buffer_holder_outer;
if constexpr (std::is_integral<Eigen::Index>::value &&
yuslepukhin marked this conversation as resolved.
Show resolved Hide resolved
std::is_signed<Eigen::Index>::value &&
(sizeof(Eigen::Index) == sizeof(int64_t))) {
// On macOS the following reinterpret_cast is necessary because Eigen::Index is an alias of `long` but int64_t is
// `long long`. Though they have the same size, compilers still do not allow an implicit casting between them.
inner_index_pointer = reinterpret_cast<const Eigen::Index*>(csr_view.Inner().Data<int64_t>());
outer_index_pointer = reinterpret_cast<const Eigen::Index*>(csr_view.Outer().Data<int64_t>());
} else {
// In a 32-bit build we need to cast the following two tensors to 32 bits
gsl::span<const int64_t> inner_data = csr_view.Inner().DataAsSpan<int64_t>();
gsl::span<const int64_t> outer_data = csr_view.Outer().DataAsSpan<int64_t>();
buffer_holder_inner.reset(new Eigen::Index[inner_data.size()]);
buffer_holder_outer.reset(new Eigen::Index[outer_data.size()]);
inner_index_pointer = buffer_holder_inner.get();
outer_index_pointer = buffer_holder_outer.get();

std::transform(inner_data.begin(), inner_data.end(),
buffer_holder_inner.get(), [](int64_t v) -> Eigen::Index {
return narrow<Eigen::Index>(v);
});
std::transform(outer_data.begin(), outer_data.end(),
buffer_holder_outer.get(), [](int64_t v) -> Eigen::Index {
return narrow<Eigen::Index>(v);
});
}
ConstSparseMatrixMap<T> map_A(narrow<Eigen::Index>(a_dims[0]), narrow<Eigen::Index>(a_dims[1]),
narrow<Eigen::Index>(A.NumValues()), outer_index_pointer, inner_index_pointer,
A.Values().Data<T>());
ConstEigenMatrixMapRowMajor<T> map_B(B.Data<T>(), b_dims[0], b_dims[1]);
EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), out_dims[0], out_dims[1]);
ConstEigenMatrixMapRowMajor<T> map_B(B.Data<T>(), narrow<Eigen::Index>(b_dims[0]), narrow<Eigen::Index>(b_dims[1]));
EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), narrow<Eigen::Index>(out_dims[0]),
narrow<Eigen::Index>(out_dims[1]));
// XXX: Consider re-writing it as a parallel loop as Eigen requires it to use OpenMP
// XXX: Consider vectorization
SparseDenseMatMulImpl(ctx, map_A, map_B, output_map);
}
};

#endif //! defined(__i386__) && !defined(_M_IX86) && !defined(__wasm__) && !defined(__ANDROID__)

template <typename T>
inline T Mul(T a_value, float, T b_value) {
return a_value * b_value;
Expand All @@ -121,9 +147,11 @@ struct SparseToDenseCoo {
auto coo_view = A.AsCoo();
const auto& ind_dims = coo_view.Indices().Shape().GetDims();
ORT_RETURN_IF_NOT(ind_dims.size() == 2, "COO indices must be 2-D, got: ", ind_dims.size());
ConstEigenMatrixMapRowMajor<int64_t> a_indicies_map(coo_view.Indices().Data<int64_t>(), narrow<size_t>(ind_dims[0]), narrow<size_t>(ind_dims[1]));
ConstEigenMatrixMapRowMajor<int64_t> a_indicies_map(coo_view.Indices().Data<int64_t>(), narrow<size_t>(ind_dims[0]),
narrow<size_t>(ind_dims[1]));
ConstEigenMatrixMapRowMajor<T> map_b(B.Data<T>(), narrow<size_t>(b_dims[0]), narrow<size_t>(b_dims[1]));
EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), narrow<size_t>(out_dims[0]), narrow<size_t>(out_dims[1]));
EigenMatrixMapRowMajor<T> output_map(output.MutableData<T>(), narrow<size_t>(out_dims[0]),
narrow<size_t>(out_dims[1]));
output_map.setZero();

const auto rhs_right = (ctx.trans_B) ? b_dims[0] : b_dims[1];
Expand All @@ -140,7 +168,8 @@ struct SparseToDenseCoo {
ORT_RETURN_IF_NOT(m < out_left, "COO m index: ", m, " is out of bounds of out_left: ", out_left);
const T a_value = a_values[i];
for (int64_t n = 0; n < rhs_right; ++n) {
const T b_value = (ctx.trans_B) ? map_b(narrow<size_t>(n), narrow<size_t>(k)) : map_b(narrow<size_t>(k), narrow<size_t>(n));
const T b_value =
(ctx.trans_B) ? map_b(narrow<size_t>(n), narrow<size_t>(k)) : map_b(narrow<size_t>(k), narrow<size_t>(n));
output_map(narrow<size_t>(m), narrow<size_t>(n)) += Mul(a_value, ctx.alpha, b_value);
}
}
Expand Down Expand Up @@ -170,8 +199,9 @@ Status SparseToDenseMatMul::Compute(OpKernelContext* ctx) const {
const auto inner_B = (trans_b_attr_) ? b_dims[1] : b_dims[0];
const auto outer_B = (trans_b_attr_) ? b_dims[0] : b_dims[1];

ORT_RETURN_IF_NOT(inner_A == inner_B, "Can not multiply A and B as inner dimension does not match. inner_A: ",
inner_A, " vs inner_B: ", inner_B);
ORT_RETURN_IF_NOT(inner_A == inner_B,
"Can not multiply A and B as inner dimension does not match. inner_A: ", inner_A,
" vs inner_B: ", inner_B);

TensorShape output_shape{outer_A, outer_B};
auto* output = ctx->Output(0, output_shape);
Expand All @@ -184,12 +214,10 @@ Status SparseToDenseMatMul::Compute(OpKernelContext* ctx) const {
auto coo_view = A->AsCoo();
const auto num_dims = coo_view.Indices().Shape().NumDimensions();
ORT_RETURN_IF_NOT(num_dims == 2, "Expecting COO 2-D indices shape");
ORT_RETURN_IF_NOT(A->Values().Shape().Size() * 2 == coo_view.Indices().Shape().Size(), "Expecting 2xValues == indices");
ORT_RETURN_IF_NOT(A->Values().Shape().Size() * 2 == coo_view.Indices().Shape().Size(),
"Expecting 2xValues == indices");
auto status = t_disp.InvokeRet<Status, SparseToDenseCoo>(compute_ctx, *A, *B, *output);
ORT_RETURN_IF_ERROR(status);
// Eigen has a bug in x86 where it calculates reallocation size as -1
// and throws bad_alloc
#if !defined(__i386__) && !defined(_M_IX86) && !defined(__wasm__) && !defined(__ANDROID__)
} else if (A->Format() == SparseFormat::kCsrc) {
auto csr_view = A->AsCsr();
ORT_RETURN_IF_NOT(A->Values().Shape().Size() == csr_view.Inner().Shape().Size(),
Expand All @@ -199,16 +227,11 @@ Status SparseToDenseMatMul::Compute(OpKernelContext* ctx) const {
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Currently support only COO and CSR(x64) formats");
}
#else
} else {
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "WASM and 32-bit builds support only COO format");
}
#endif //! defined(__i386__) && !defined(_M_IX86) && !defined(__wasm__) && !defined(__ANDROID__)

return Status::OK();
}

} // namespace contrib
} // namespace onnxruntime

#endif //! defined(DISABLE_SPARSE_TENSORS)
#endif //! defined(DISABLE_SPARSE_TENSORS)
2 changes: 1 addition & 1 deletion onnxruntime/core/util/math_cpuonly.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ template <typename T>
using ConstEigenMatrixMap = Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>;

template <class T>
using ConstSparseMatrixMap = Eigen::Map<const Eigen::SparseMatrix<T, Eigen::RowMajor, int64_t>>;
using ConstSparseMatrixMap = Eigen::Map<const Eigen::SparseMatrix<T, Eigen::RowMajor, Eigen::Index>>;

template <typename T>
using ConstEigenArrayMap = Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic>>;
Expand Down
2 changes: 0 additions & 2 deletions onnxruntime/test/contrib_ops/math/matmul_sparse_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ void resize(Index size, double reserveSizeFactor = 0) {
}
*/
#if !defined(DISABLE_SPARSE_TENSORS)
#if !defined(__i386__) && !defined(_M_IX86) && !defined(__wasm__) && !defined(__ANDROID__)
TEST(SparseToDenseMatMul, TestCsr) {
constexpr int64_t rows = 9;
constexpr int64_t cols = 9;
Expand Down Expand Up @@ -261,7 +260,6 @@ TEST(SparseToDenseMatMul, TestCsr) {
tester.Run(OpTester::ExpectResult::kExpectSuccess);
}
}
#endif // //!defined(__i386__) && !defined(_M_IX86) && !defined(__wasm__) && !defined(__ANDROID__)

TEST(SparseToDenseMatMul, TestCoo) {
constexpr int64_t rows = 9;
Expand Down
3 changes: 1 addition & 2 deletions tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ stages:
ls $(Build.BinariesDirectory)/gccbin/bin
mkdir $(Build.BinariesDirectory)/arm32build
cd $(Build.BinariesDirectory)/arm32build
# TODO: fix the warnings and remove the --compile-no-warning-as-error arg
cmake --compile-no-warning-as-error $(Build.SourcesDirectory)/cmake -Donnxruntime_ENABLE_CPUINFO=OFF -DPython_EXECUTABLE=/usr/bin/python3 -DPYTHON_EXECUTABLE=/usr/bin/python3 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$(Build.SourcesDirectory)/cmake/linux_arm32_crosscompile_toolchain.cmake -G Ninja
cmake $(Build.SourcesDirectory)/cmake -Donnxruntime_ENABLE_CPUINFO=OFF -DPython_EXECUTABLE=/usr/bin/python3 -DPYTHON_EXECUTABLE=/usr/bin/python3 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_TOOLCHAIN_FILE=$(Build.SourcesDirectory)/cmake/linux_arm32_crosscompile_toolchain.cmake -G Ninja
ninja
rm -rf $(Build.BinariesDirectory)/arm32build $(Build.BinariesDirectory)/gccbin
displayName: Cross-compile for Linux ARM32 and ARM64
Expand Down
Loading