Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-34535: [C++] Move ChunkResolver to the public API #44357

Merged
merged 11 commits into from
Oct 21, 2024
4 changes: 2 additions & 2 deletions cpp/src/arrow/chunk_resolver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include "arrow/array.h"
#include "arrow/record_batch.h"

namespace arrow::internal {
namespace arrow {

namespace {
template <typename T>
Expand Down Expand Up @@ -167,4 +167,4 @@ void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_i
logical_index_vec, out_chunk_location_vec, chunk_hint);
}

} // namespace arrow::internal
} // namespace arrow
40 changes: 23 additions & 17 deletions cpp/src/arrow/chunk_resolver.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
#include "arrow/type_fwd.h"
#include "arrow/util/macros.h"

namespace arrow::internal {
namespace arrow {

struct ChunkResolver;
class ChunkResolver;

template <typename IndexType>
struct TypedChunkLocation {
struct ARROW_EXPORT TypedChunkLocation {
/// \brief Index of the chunk in the array of chunks
///
/// The value is always in the range `[0, chunks.size()]`. `chunks.size()` is used
Expand All @@ -41,7 +41,7 @@ struct TypedChunkLocation {

/// \brief Index of the value in the chunk
///
/// The value is UNDEFINED if chunk_index >= chunks.size()
/// The value is UNDEFINED if `chunk_index >= chunks.size()`
IndexType index_in_chunk = 0;

TypedChunkLocation() = default;
Expand All @@ -61,7 +61,7 @@ using ChunkLocation = TypedChunkLocation<int64_t>;

/// \brief An utility that incrementally resolves logical indices into
/// physical indices in a chunked array.
struct ARROW_EXPORT ChunkResolver {
class ARROW_EXPORT ChunkResolver {
private:
/// \brief Array containing `chunks.size() + 1` offsets.
///
Expand All @@ -76,14 +76,11 @@ struct ARROW_EXPORT ChunkResolver {

public:
explicit ChunkResolver(const ArrayVector& chunks) noexcept;

explicit ChunkResolver(const std::vector<const Array*>& chunks) noexcept;
anjakefala marked this conversation as resolved.
Show resolved Hide resolved

explicit ChunkResolver(const RecordBatchVector& batches) noexcept;

/// \brief Construct a ChunkResolver from a vector of chunks.size() + 1 offsets.
///
/// The first offset must be 0 and the last offset must be the logical length of the
/// chunked array. Each offset before the last represents the starting logical index of
/// the corresponding chunk.
explicit ChunkResolver(std::vector<int64_t> offsets) noexcept
: offsets_(std::move(offsets)), cached_chunk_(0) {
#ifndef NDEBUG
Expand Down Expand Up @@ -115,11 +112,11 @@ struct ARROW_EXPORT ChunkResolver {
/// The returned ChunkLocation contains the chunk index and the within-chunk index
/// equivalent to the logical index.
///
/// \pre index >= 0
/// \post location.chunk_index in [0, chunks.size()]
/// \pre `index >= 0`
/// \post `location.chunk_index` in `[0, chunks.size()]`
/// \param index The logical index to resolve
/// \return ChunkLocation with a valid chunk_index if index is within
/// bounds, or with chunk_index == chunks.size() if logical index is
/// bounds, or with `chunk_index == chunks.size()` if logical index is
/// `>= chunked_array.length()`.
inline ChunkLocation Resolve(int64_t index) const {
const auto cached_chunk = cached_chunk_.load(std::memory_order_relaxed);
Expand All @@ -133,13 +130,13 @@ struct ARROW_EXPORT ChunkResolver {
/// The returned ChunkLocation contains the chunk index and the within-chunk index
/// equivalent to the logical index.
///
/// \pre index >= 0
/// \post location.chunk_index in [0, chunks.size()]
/// \pre `index >= 0`
/// \post `location.chunk_index` in `[0, chunks.size()]`
/// \param index The logical index to resolve
/// \param hint ChunkLocation{} or the last ChunkLocation returned by
/// this ChunkResolver.
/// \return ChunkLocation with a valid chunk_index if index is within
/// bounds, or with chunk_index == chunks.size() if logical index is
/// bounds, or with `chunk_index == chunks.size()` if logical index is
/// `>= chunked_array.length()`.
inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) const {
assert(hint.chunk_index < static_cast<uint32_t>(offsets_.size()));
Expand Down Expand Up @@ -281,4 +278,13 @@ struct ARROW_EXPORT ChunkResolver {
}
};

} // namespace arrow::internal
// Explicitly instantiate template base struct, for DLL linking on Windows
template struct TypedChunkLocation<int32_t>;
template struct TypedChunkLocation<int16_t>;
template struct TypedChunkLocation<int8_t>;
template struct TypedChunkLocation<uint8_t>;
template struct TypedChunkLocation<uint16_t>;
template struct TypedChunkLocation<uint32_t>;
template struct TypedChunkLocation<int64_t>;
template struct TypedChunkLocation<uint64_t>;
} // namespace arrow
3 changes: 0 additions & 3 deletions cpp/src/arrow/chunk_resolver_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@

namespace arrow {

using internal::ChunkResolver;
using internal::TypedChunkLocation;

namespace {

int64_t constexpr kChunkedArrayLength = std::numeric_limits<uint16_t>::max();
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/chunked_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ class ARROW_EXPORT ChunkedArray {
private:
template <typename T, typename V>
friend class ::arrow::stl::ChunkedArrayIterator;
internal::ChunkResolver chunk_resolver_;
ChunkResolver chunk_resolver_;
ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
};

Expand Down
4 changes: 0 additions & 4 deletions cpp/src/arrow/chunked_array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@

namespace arrow {

using internal::ChunkLocation;
using internal::ChunkResolver;
using internal::TypedChunkLocation;

class TestChunkedArray : public ::testing::Test {
protected:
virtual void Construct() {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/chunked_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ struct ResolvedChunk {

class ChunkedArrayResolver {
private:
::arrow::internal::ChunkResolver resolver_;
ChunkResolver resolver_;
std::vector<const Array*> chunks_;

public:
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/arrow/compute/kernels/vector_sort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
namespace arrow {

using internal::checked_cast;
using internal::ChunkLocation;

namespace compute {
namespace internal {
Expand Down Expand Up @@ -852,7 +851,7 @@ class TableSorter {
const RecordBatchVector batches_;
const SortOptions& options_;
const NullPlacement null_placement_;
const ::arrow::internal::ChunkResolver left_resolver_, right_resolver_;
const ::arrow::ChunkResolver left_resolver_, right_resolver_;
const std::vector<ResolvedSortKey> sort_keys_;
uint64_t* indices_begin_;
uint64_t* indices_end_;
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/compute/kernels/vector_sort_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -749,9 +749,9 @@ struct ResolvedTableSortKey {
order(order),
null_count(null_count) {}

using LocationType = ::arrow::internal::ChunkLocation;
using LocationType = ::arrow::ChunkLocation;

ResolvedChunk GetChunk(::arrow::internal::ChunkLocation loc) const {
ResolvedChunk GetChunk(::arrow::ChunkLocation loc) const {
return {chunks[loc.chunk_index], loc.index_in_chunk};
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/stl_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ class ChunkedArrayIterator {
}

private:
arrow::internal::ChunkLocation GetChunkLocation(int64_t index) const {
arrow::ChunkLocation GetChunkLocation(int64_t index) const {
assert(chunked_array_);
return chunked_array_->chunk_resolver_.Resolve(index);
}
Expand Down
14 changes: 14 additions & 0 deletions docs/source/cpp/api/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
Arrays
======

Base classes
============

.. doxygenclass:: arrow::ArrayData
:project: arrow_cpp
:members:
Expand Down Expand Up @@ -85,6 +88,17 @@ Chunked Arrays
:project: arrow_cpp
:members:

.. doxygenstruct:: arrow::ChunkLocation
:project: arrow_cpp
:members:

.. doxygenstruct:: arrow::TypedChunkLocation
:project: arrow_cpp
:members:

.. doxygenclass:: arrow::ChunkResolver
:project: arrow_cpp
:members:

Utilities
=========
Expand Down
14 changes: 10 additions & 4 deletions r/src/altrep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,20 +80,26 @@ void DeletePointer(std::shared_ptr<T>* ptr) {
template <typename T>
using Pointer = cpp11::external_pointer<std::shared_ptr<T>, DeletePointer<T>>;

#if ARROW_MAJOR_VERSION >= 18
anjakefala marked this conversation as resolved.
Show resolved Hide resolved
using ChunkResolver = arrow::ChunkResolver;
using ChunkLocation = arrow::ChunkLocation;
#else
using ChunkResolver = arrow::internal::ChunkResolver;
using ChunkLocation = arrow::internal::ChunkLocation;
#endif

class ArrowAltrepData {
public:
explicit ArrowAltrepData(const std::shared_ptr<ChunkedArray>& chunked_array)
: chunked_array_(chunked_array), resolver_(chunked_array->chunks()) {}

const std::shared_ptr<ChunkedArray>& chunked_array() { return chunked_array_; }

arrow::internal::ChunkLocation locate(int64_t index) {
return resolver_.Resolve(index);
}
ChunkLocation locate(int64_t index) { return resolver_.Resolve(index); }

private:
std::shared_ptr<ChunkedArray> chunked_array_;
arrow::internal::ChunkResolver resolver_;
ChunkResolver resolver_;
};

// the ChunkedArray that is being wrapped by the altrep object
Expand Down
Loading