From f69b3b8f6eaec768866517be04f87719e595cf68 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 25 Sep 2024 13:40:13 +0200 Subject: [PATCH] Remove the need for ChunkResolver --- .../arrow/compute/kernels/chunked_internal.cc | 23 ++++++++++--------- .../arrow/compute/kernels/chunked_internal.h | 9 ++++---- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.cc b/cpp/src/arrow/compute/kernels/chunked_internal.cc index d1b234bfc48ef..8932f60dc540a 100644 --- a/cpp/src/arrow/compute/kernels/chunked_internal.cc +++ b/cpp/src/arrow/compute/kernels/chunked_internal.cc @@ -68,24 +68,25 @@ ChunkedIndexMapper::LogicalToPhysical() { } } - constexpr int64_t kMaxBatchSize = 512; - std::array, kMaxBatchSize> batch; - const int64_t num_indices = static_cast(indices_end_ - indices_begin_); ResolvedChunkIndex* physical_begin = reinterpret_cast(indices_begin_); DCHECK_EQ(physical_begin + num_indices, reinterpret_cast(indices_end_)); - for (int64_t i = 0; i < num_indices; i += kMaxBatchSize) { - const int64_t batch_size = std::min(kMaxBatchSize, num_indices - i); - [[maybe_unused]] bool ok = - resolver_.ResolveMany(batch_size, indices_begin_ + i, batch.data()); - DCHECK(ok) << "ResolveMany unexpectedly failed (invalid logical index?)"; - for (int64_t j = 0; j < batch_size; ++j) { - const auto loc = batch[j]; - physical_begin[i + j] = ResolvedChunkIndex{loc.chunk_index, loc.index_in_chunk}; + int64_t chunk_offset = 0; + for (int64_t chunk_index = 0; chunk_index < static_cast(chunk_lengths_.size()); + ++chunk_index) { + const int64_t chunk_length = chunk_lengths_[chunk_index]; + for (int64_t i = 0; i < chunk_length; ++i) { + DCHECK_GE(indices_begin_[chunk_offset + i], static_cast(chunk_offset)); + DCHECK_LT(indices_begin_[chunk_offset + i], + static_cast(chunk_offset + chunk_length)); + physical_begin[chunk_offset + i] = ResolvedChunkIndex{ + static_cast(chunk_index), + indices_begin_[chunk_offset + i] - static_cast(chunk_offset)}; } + chunk_offset += chunk_length; } return std::pair{physical_begin, physical_begin + num_indices}; diff --git a/cpp/src/arrow/compute/kernels/chunked_internal.h b/cpp/src/arrow/compute/kernels/chunked_internal.h index abb2ae6a47466..f9c86d3ecc34e 100644 --- a/cpp/src/arrow/compute/kernels/chunked_internal.h +++ b/cpp/src/arrow/compute/kernels/chunked_internal.h @@ -133,20 +133,20 @@ class ChunkedIndexMapper { : ChunkedIndexMapper(util::span(chunks), indices_begin, indices_end) {} ChunkedIndexMapper(util::span chunks, uint64_t* indices_begin, uint64_t* indices_end) - : resolver_(chunks), - chunk_lengths_(GetChunkLengths(chunks)), + : chunk_lengths_(GetChunkLengths(chunks)), indices_begin_(indices_begin), indices_end_(indices_end) {} ChunkedIndexMapper(const RecordBatchVector& chunks, uint64_t* indices_begin, uint64_t* indices_end) - : resolver_(chunks), - chunk_lengths_(GetChunkLengths(chunks)), + : chunk_lengths_(GetChunkLengths(chunks)), indices_begin_(indices_begin), indices_end_(indices_end) {} // Turn the original uint64_t logical indices into physical. This reuses the // same memory area, so the logical indices cannot be used anymore until // PhysicalToLogical() is called. + // + // This assumes that the logical indices are originally chunk-partitioned. Result> LogicalToPhysical(); // Turn the physical indices back into logical, making the uint64_t indices @@ -157,7 +157,6 @@ class ChunkedIndexMapper { static std::vector GetChunkLengths(util::span chunks); static std::vector GetChunkLengths(const RecordBatchVector& chunks); - ChunkResolver resolver_; std::vector chunk_lengths_; uint64_t* indices_begin_; uint64_t* indices_end_;