Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-34535: [C++] Move ChunkResolver to the public API #44357

Merged
merged 11 commits into from
Oct 21, 2024
4 changes: 2 additions & 2 deletions cpp/src/arrow/chunk_resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include "arrow/array.h"
#include "arrow/record_batch.h"

namespace arrow::internal {
namespace arrow {

namespace {
template <typename T>
Expand Down Expand Up @@ -167,4 +167,4 @@ void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_i
logical_index_vec, out_chunk_location_vec, chunk_hint);
}

} // namespace arrow::internal
} // namespace arrow
41 changes: 29 additions & 12 deletions cpp/src/arrow/chunk_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"

namespace arrow::internal {
namespace arrow {

struct ChunkResolver;
class ChunkResolver;

template <typename IndexType>
struct TypedChunkLocation {
struct ARROW_EXPORT TypedChunkLocation {
/// \brief Index of the chunk in the array of chunks
///
/// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
Expand All @@ -41,7 +41,7 @@ struct TypedChunkLocation {

/// \brief Index of the value in the chunk
///
/// The value is UNDEFINED if chunk_index >= chunks.size()
/// The value is UNDEFINED if `chunk_index >= chunks.size()`
IndexType index_in_chunk = 0;

TypedChunkLocation() = default;
Expand All @@ -61,7 +61,7 @@ using ChunkLocation = TypedChunkLocation<int64_t>;

/// \brief An utility that incrementally resolves logical indices into
/// physical indices in a chunked array.
struct ARROW_EXPORT ChunkResolver {
class ARROW_EXPORT ChunkResolver {
private:
/// \brief Array containing `chunks.size() + 1` offsets.
///
Expand All @@ -75,8 +75,16 @@ struct ARROW_EXPORT ChunkResolver {
mutable std::atomic<int32_t> cached_chunk_;

public:
/// \brief Initialize from an `ArrayVector`.
explicit ChunkResolver(const ArrayVector& chunks) noexcept;

/// \brief Initialize from a vector of raw `Array` pointers.
explicit ChunkResolver(const std::vector<const Array*>& chunks) noexcept;
anjakefala marked this conversation as resolved.
Show resolved Hide resolved

/// \brief Initialize from a `RecordBatchVector`.
///
/// Because all `Array`s in a `RecordBatch` must have the same length, this
/// can be useful for iterating over multiple columns simultaneously.
explicit ChunkResolver(const RecordBatchVector& batches) noexcept;

/// \brief Construct a ChunkResolver from a vector of chunks.size() + 1 offsets.
Expand Down Expand Up @@ -115,11 +123,11 @@ struct ARROW_EXPORT ChunkResolver {
/// The returned ChunkLocation contains the chunk index and the within-chunk index
/// equivalent to the logical index.
///
/// \pre index >= 0
/// \post location.chunk_index in [0, chunks.size()]
/// \pre `index >= 0`
/// \post `location.chunk_index` in `[0, chunks.size()]`
/// \param index The logical index to resolve
/// \return ChunkLocation with a valid chunk_index if index is within
/// bounds, or with chunk_index == chunks.size() if logical index is
/// bounds, or with `chunk_index == chunks.size()` if logical index is
/// `>= chunked_array.length()`.
inline ChunkLocation Resolve(int64_t index) const {
const auto cached_chunk = cached_chunk_.load(std::memory_order_relaxed);
Expand All @@ -133,13 +141,13 @@ struct ARROW_EXPORT ChunkResolver {
/// The returned ChunkLocation contains the chunk index and the within-chunk index
/// equivalent to the logical index.
///
/// \pre index >= 0
/// \post location.chunk_index in [0, chunks.size()]
/// \pre `index >= 0`
/// \post `location.chunk_index` in `[0, chunks.size()]`
/// \param index The logical index to resolve
/// \param hint ChunkLocation{} or the last ChunkLocation returned by
/// this ChunkResolver.
/// \return ChunkLocation with a valid chunk_index if index is within
/// bounds, or with chunk_index == chunks.size() if logical index is
/// bounds, or with `chunk_index == chunks.size()` if logical index is
/// `>= chunked_array.length()`.
inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) const {
assert(hint.chunk_index < static_cast<uint32_t>(offsets_.size()));
Expand Down Expand Up @@ -281,4 +289,13 @@ struct ARROW_EXPORT ChunkResolver {
}
};

} // namespace arrow::internal
// Explicitly instantiate template base struct, for DLL linking on Windows
template struct arrow::TypedChunkLocation<int32_t>;
template struct arrow::TypedChunkLocation<int16_t>;
template struct arrow::TypedChunkLocation<int8_t>;
template struct arrow::TypedChunkLocation<uint8_t>;
template struct arrow::TypedChunkLocation<uint16_t>;
template struct arrow::TypedChunkLocation<uint32_t>;
template struct arrow::TypedChunkLocation<int64_t>;
template struct arrow::TypedChunkLocation<uint64_t>;
anjakefala marked this conversation as resolved.
Show resolved Hide resolved
} // namespace arrow
4 changes: 2 additions & 2 deletions cpp/src/arrow/chunk_resolver_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

namespace arrow {

using internal::ChunkResolver;
using internal::TypedChunkLocation;
using arrow::ChunkResolver;
using arrow::TypedChunkLocation;
anjakefala marked this conversation as resolved.
Show resolved Hide resolved

namespace {

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/chunked_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ class ARROW_EXPORT ChunkedArray {
private:
template <typename T, typename V>
friend class ::arrow::stl::ChunkedArrayIterator;
internal::ChunkResolver chunk_resolver_;
ChunkResolver chunk_resolver_;
ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
};

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/chunked_array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@

namespace arrow {

using internal::ChunkLocation;
using internal::ChunkResolver;
using internal::TypedChunkLocation;
using arrow::ChunkLocation;
using arrow::ChunkResolver;
using arrow::TypedChunkLocation;
anjakefala marked this conversation as resolved.
Show resolved Hide resolved

class TestChunkedArray : public ::testing::Test {
protected:
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/chunked_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ struct ResolvedChunk {

class ChunkedArrayResolver {
private:
::arrow::internal::ChunkResolver resolver_;
ChunkResolver resolver_;
std::vector<const Array*> chunks_;

public:
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/kernels/vector_sort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

namespace arrow {

using arrow::ChunkLocation;
anjakefala marked this conversation as resolved.
Show resolved Hide resolved
using internal::checked_cast;
using internal::ChunkLocation;

namespace compute {
namespace internal {
Expand Down Expand Up @@ -852,7 +852,7 @@ class TableSorter {
const RecordBatchVector batches_;
const SortOptions& options_;
const NullPlacement null_placement_;
const ::arrow::internal::ChunkResolver left_resolver_, right_resolver_;
const ::arrow::ChunkResolver left_resolver_, right_resolver_;
const std::vector<ResolvedSortKey> sort_keys_;
uint64_t* indices_begin_;
uint64_t* indices_end_;
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/kernels/vector_sort_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -749,9 +749,9 @@ struct ResolvedTableSortKey {
order(order),
null_count(null_count) {}

using LocationType = ::arrow::internal::ChunkLocation;
using LocationType = ::arrow::ChunkLocation;

ResolvedChunk GetChunk(::arrow::internal::ChunkLocation loc) const {
ResolvedChunk GetChunk(::arrow::ChunkLocation loc) const {
return {chunks[loc.chunk_index], loc.index_in_chunk};
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/stl_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class ChunkedArrayIterator {
}

private:
arrow::internal::ChunkLocation GetChunkLocation(int64_t index) const {
arrow::ChunkLocation GetChunkLocation(int64_t index) const {
assert(chunked_array_);
return chunked_array_->chunk_resolver_.Resolve(index);
}
Expand Down
14 changes: 14 additions & 0 deletions docs/source/cpp/api/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
Arrays
======

Base classes
============

.. doxygenclass:: arrow::ArrayData
:project: arrow_cpp
:members:
Expand Down Expand Up @@ -85,6 +88,17 @@ Chunked Arrays
:project: arrow_cpp
:members:

.. doxygenstruct:: arrow::ChunkLocation
:project: arrow_cpp
:members:

.. doxygenstruct:: arrow::TypedChunkLocation
:project: arrow_cpp
:members:

.. doxygenclass:: arrow::ChunkResolver
:project: arrow_cpp
:members:

Utilities
=========
Expand Down
6 changes: 2 additions & 4 deletions r/src/altrep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,13 +87,11 @@ class ArrowAltrepData {

const std::shared_ptr<ChunkedArray>& chunked_array() { return chunked_array_; }

arrow::internal::ChunkLocation locate(int64_t index) {
return resolver_.Resolve(index);
}
arrow::ChunkLocation locate(int64_t index) { return resolver_.Resolve(index); }

private:
std::shared_ptr<ChunkedArray> chunked_array_;
arrow::internal::ChunkResolver resolver_;
arrow::ChunkResolver resolver_;
amoeba marked this conversation as resolved.
Show resolved Hide resolved
};

// the ChunkedArray that is being wrapped by the altrep object
Expand Down
Loading