Skip to content

Commit

Permalink
more refactor for Arm64
Browse files Browse the repository at this point in the history
Signed-off-by: Liqun Fu <[email protected]>
  • Loading branch information
liqunfu committed Jul 26, 2024
1 parent 51e97c8 commit 48e8639
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 1 deletion.
1 change: 0 additions & 1 deletion onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ Status MatMulNBits::UseSharedPrePackedBuffers(std::vector<BufferUniquePtr>& prep
}

Status MatMulNBits::Compute(OpKernelContext* ctx) const {
//auto start = std::chrono::high_resolution_clock::now(); // Start timing here
concurrency::ThreadPool* thread_pool = ctx->GetOperatorThreadPool();
const Tensor* a = ctx->Input<Tensor>(InputIndex::A);
const auto* a_data = a->Data<float>();
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/mlas/lib/sqnbitgemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ SQ4BitGemm_CompInt8(
RowsRemaining -= RowsHandled;
}
}
#ifdef MLAS_TARGET_AMD64_IX86
else if (GetMlasPlatform().SQNBitGemmDispatch->SQ4BitGemmKernel_BlkSum_CompInt8 != nullptr)
{
const float* b_blk_sum = QuantBBlkSum + n * k_blks;
Expand Down Expand Up @@ -562,6 +563,7 @@ SQ4BitGemm_CompInt8(
);
}
}
#endif
}
}

Expand Down

0 comments on commit 48e8639

Please sign in to comment.