From c7dd6b48684028a65b1d19d5d5b04060f6a4fe19 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Fri, 22 Sep 2023 14:15:31 -0400 Subject: [PATCH] Refactor libcudf indexalator to typed normalator (#14043) Creates generic normalizing-iterator for integer types for use by the `indexalator` and the future offsets normalizing iterator. Mostly code has been moved around or renamed so the normalizing-iterator part can take type template parameter to identify which integer type to normalize to. For the `indexalator`, this type is `cudf::size_type` and for the offsets iterator this type would be `int64`. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - MithunR (https://github.com/mythrocks) URL: https://github.com/rapidsai/cudf/pull/14043 --- cpp/include/cudf/detail/indexalator.cuh | 332 +--------------- .../cudf/detail/normalizing_iterator.cuh | 367 ++++++++++++++++++ 2 files changed, 374 insertions(+), 325 deletions(-) create mode 100644 cpp/include/cudf/detail/normalizing_iterator.cuh diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index 4731c4919e3..6532dae3695 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -16,14 +16,13 @@ #pragma once +#include + #include #include #include #include -#include -#include -#include #include #include #include @@ -32,193 +31,6 @@ namespace cudf { namespace detail { -/** - * @brief The base class for the input or output index normalizing iterator. - * - * This implementation uses CRTP to define the `input_indexalator` and the - * `output_indexalator` classes. This is so this class can manipulate the - * uniquely typed subclass member variable `p_` directly without requiring - * virtual functions since iterator instances will be copied to device memory. - * - * The base class mainly manages updating the `p_` member variable while the - * subclasses handle accessing individual elements in device memory. - * - * @tparam T The derived class type for the iterator. - */ -template -struct base_indexalator { - using difference_type = ptrdiff_t; - using value_type = size_type; - using pointer = size_type*; - using iterator_category = std::random_access_iterator_tag; - - base_indexalator() = default; - base_indexalator(base_indexalator const&) = default; - base_indexalator(base_indexalator&&) = default; - base_indexalator& operator=(base_indexalator const&) = default; - base_indexalator& operator=(base_indexalator&&) = default; - - /** - * @brief Prefix increment operator. - */ - CUDF_HOST_DEVICE inline T& operator++() - { - T& derived = static_cast(*this); - derived.p_ += width_; - return derived; - } - - /** - * @brief Postfix increment operator. - */ - CUDF_HOST_DEVICE inline T operator++(int) - { - T tmp{static_cast(*this)}; - operator++(); - return tmp; - } - - /** - * @brief Prefix decrement operator. - */ - CUDF_HOST_DEVICE inline T& operator--() - { - T& derived = static_cast(*this); - derived.p_ -= width_; - return derived; - } - - /** - * @brief Postfix decrement operator. - */ - CUDF_HOST_DEVICE inline T operator--(int) - { - T tmp{static_cast(*this)}; - operator--(); - return tmp; - } - - /** - * @brief Compound assignment by sum operator. - */ - CUDF_HOST_DEVICE inline T& operator+=(difference_type offset) - { - T& derived = static_cast(*this); - derived.p_ += offset * width_; - return derived; - } - - /** - * @brief Increment by offset operator. - */ - CUDF_HOST_DEVICE inline T operator+(difference_type offset) const - { - auto tmp = T{static_cast(*this)}; - tmp.p_ += (offset * width_); - return tmp; - } - - /** - * @brief Addition assignment operator. - */ - CUDF_HOST_DEVICE inline friend T operator+(difference_type offset, T const& rhs) - { - T tmp{rhs}; - tmp.p_ += (offset * rhs.width_); - return tmp; - } - - /** - * @brief Compound assignment by difference operator. - */ - CUDF_HOST_DEVICE inline T& operator-=(difference_type offset) - { - T& derived = static_cast(*this); - derived.p_ -= offset * width_; - return derived; - } - - /** - * @brief Decrement by offset operator. - */ - CUDF_HOST_DEVICE inline T operator-(difference_type offset) const - { - auto tmp = T{static_cast(*this)}; - tmp.p_ -= (offset * width_); - return tmp; - } - - /** - * @brief Subtraction assignment operator. - */ - CUDF_HOST_DEVICE inline friend T operator-(difference_type offset, T const& rhs) - { - T tmp{rhs}; - tmp.p_ -= (offset * rhs.width_); - return tmp; - } - - /** - * @brief Compute offset from iterator difference operator. - */ - CUDF_HOST_DEVICE inline difference_type operator-(T const& rhs) const - { - return (static_cast(*this).p_ - rhs.p_) / width_; - } - - /** - * @brief Equals to operator. - */ - CUDF_HOST_DEVICE inline bool operator==(T const& rhs) const - { - return rhs.p_ == static_cast(*this).p_; - } - /** - * @brief Not equals to operator. - */ - CUDF_HOST_DEVICE inline bool operator!=(T const& rhs) const - { - return rhs.p_ != static_cast(*this).p_; - } - /** - * @brief Less than operator. - */ - CUDF_HOST_DEVICE inline bool operator<(T const& rhs) const - { - return static_cast(*this).p_ < rhs.p_; - } - /** - * @brief Greater than operator. - */ - CUDF_HOST_DEVICE inline bool operator>(T const& rhs) const - { - return static_cast(*this).p_ > rhs.p_; - } - /** - * @brief Less than or equals to operator. - */ - CUDF_HOST_DEVICE inline bool operator<=(T const& rhs) const - { - return static_cast(*this).p_ <= rhs.p_; - } - /** - * @brief Greater than or equals to operator. - */ - CUDF_HOST_DEVICE inline bool operator>=(T const& rhs) const - { - return static_cast(*this).p_ >= rhs.p_; - } - - protected: - /** - * @brief Constructor assigns width and type member variables for base class. - */ - base_indexalator(int32_t width, data_type dtype) : width_(width), dtype_(dtype) {} - - int width_; /// integer type width = 1,2,4, or 8 - data_type dtype_; /// for type-dispatcher calls -}; - /** * @brief The index normalizing input iterator. * @@ -244,65 +56,7 @@ struct base_indexalator { * auto result = thrust::find(thrust::device, begin, end, size_type{12} ); * @endcode */ -struct input_indexalator : base_indexalator { - friend struct indexalator_factory; - friend struct base_indexalator; // for CRTP - - using reference = size_type const; // this keeps STL and thrust happy - - input_indexalator() = default; - input_indexalator(input_indexalator const&) = default; - input_indexalator(input_indexalator&&) = default; - input_indexalator& operator=(input_indexalator const&) = default; - input_indexalator& operator=(input_indexalator&&) = default; - - /** - * @brief Indirection operator returns the value at the current iterator position. - */ - __device__ inline size_type operator*() const { return operator[](0); } - - /** - * @brief Dispatch functor for resolving a size_type value from any index type. - */ - struct index_as_size_type { - template ()>* = nullptr> - __device__ size_type operator()(void const* tp) - { - return static_cast(*static_cast(tp)); - } - template ()>* = nullptr> - __device__ size_type operator()(void const* tp) - { - CUDF_UNREACHABLE("only index types are supported"); - } - }; - /** - * @brief Array subscript operator returns a value at the input - * `idx` position as a `size_type` value. - */ - __device__ inline size_type operator[](size_type idx) const - { - void const* tp = p_ + (idx * width_); - return type_dispatcher(dtype_, index_as_size_type{}, tp); - } - - protected: - /** - * @brief Create an input index normalizing iterator. - * - * Use the indexalator_factory to create an iterator instance. - * - * @param data Pointer to an integer array in device memory. - * @param width The width of the integer type (1, 2, 4, or 8) - * @param data_type Index integer type of width `width` - */ - input_indexalator(void const* data, int width, data_type dtype) - : base_indexalator(width, dtype), p_{static_cast(data)} - { - } - - char const* p_; /// pointer to the integer data in device memory -}; +using input_indexalator = input_normalator; /** * @brief The index normalizing output iterator. @@ -328,79 +82,7 @@ struct input_indexalator : base_indexalator { * thrust::less()); * @endcode */ -struct output_indexalator : base_indexalator { - friend struct indexalator_factory; - friend struct base_indexalator; // for CRTP - - using reference = output_indexalator const&; // required for output iterators - - output_indexalator() = default; - output_indexalator(output_indexalator const&) = default; - output_indexalator(output_indexalator&&) = default; - output_indexalator& operator=(output_indexalator const&) = default; - output_indexalator& operator=(output_indexalator&&) = default; - - /** - * @brief Indirection operator returns this iterator instance in order - * to capture the `operator=(size_type)` calls. - */ - __device__ inline output_indexalator const& operator*() const { return *this; } - - /** - * @brief Array subscript operator returns an iterator instance at the specified `idx` position. - * - * This allows capturing the subsequent `operator=(size_type)` call in this class. - */ - __device__ inline output_indexalator const operator[](size_type idx) const - { - output_indexalator tmp{*this}; - tmp.p_ += (idx * width_); - return tmp; - } - - /** - * @brief Dispatch functor for setting the index value from a size_type value. - */ - struct size_type_to_index { - template ()>* = nullptr> - __device__ void operator()(void* tp, size_type const value) - { - (*static_cast(tp)) = static_cast(value); - } - template ()>* = nullptr> - __device__ void operator()(void* tp, size_type const value) - { - CUDF_UNREACHABLE("only index types are supported"); - } - }; - - /** - * @brief Assign a size_type value to the current iterator position. - */ - __device__ inline output_indexalator const& operator=(size_type const value) const - { - void* tp = p_; - type_dispatcher(dtype_, size_type_to_index{}, tp, value); - return *this; - } - - protected: - /** - * @brief Create an output index normalizing iterator. - * - * Use the indexalator_factory to create an iterator instance. - * - * @param data Pointer to an integer array in device memory. - * @param width The width of the integer type (1, 2, 4, or 8) - * @param data_type Index integer type of width `width` - */ - output_indexalator(void* data, int width, data_type dtype) - : base_indexalator(width, dtype), p_{static_cast(data)} - { - } - - char* p_; /// pointer to the integer data in device memory -}; +using output_indexalator = output_normalator; /** * @brief Use this class to create an indexalator instance. @@ -413,7 +95,7 @@ struct indexalator_factory { template ()>* = nullptr> input_indexalator operator()(column_view const& indices) { - return input_indexalator(indices.data(), sizeof(IndexType), indices.type()); + return input_indexalator(indices.data(), indices.type()); } template const&>(index) creates a copy auto const scalar_impl = static_cast const*>(&index); - return input_indexalator(scalar_impl->data(), sizeof(IndexType), index.type()); + return input_indexalator(scalar_impl->data(), index.type()); } template ()>* = nullptr> output_indexalator operator()(mutable_column_view const& indices) { - return output_indexalator(indices.data(), sizeof(IndexType), indices.type()); + return output_indexalator(indices.data(), indices.type()); } template + +#include + +namespace cudf { +namespace detail { + +/** + * @brief The base class for the input or output normalizing iterator + * + * The base class mainly manages updating the `p_` member variable while the + * subclasses handle accessing individual elements in device memory. + * + * @tparam Derived The derived class type for the iterator + * @tparam Integer The type the iterator normalizes to + */ +template +struct base_normalator { + static_assert(std::is_integral_v); + using difference_type = std::ptrdiff_t; + using value_type = Integer; + using pointer = Integer*; + using iterator_category = std::random_access_iterator_tag; + + base_normalator() = default; + base_normalator(base_normalator const&) = default; + base_normalator(base_normalator&&) = default; + base_normalator& operator=(base_normalator const&) = default; + base_normalator& operator=(base_normalator&&) = default; + + /** + * @brief Prefix increment operator. + */ + CUDF_HOST_DEVICE inline Derived& operator++() + { + Derived& derived = static_cast(*this); + derived.p_ += width_; + return derived; + } + + /** + * @brief Postfix increment operator. + */ + CUDF_HOST_DEVICE inline Derived operator++(int) + { + Derived tmp{static_cast(*this)}; + operator++(); + return tmp; + } + + /** + * @brief Prefix decrement operator. + */ + CUDF_HOST_DEVICE inline Derived& operator--() + { + Derived& derived = static_cast(*this); + derived.p_ -= width_; + return derived; + } + + /** + * @brief Postfix decrement operator. + */ + CUDF_HOST_DEVICE inline Derived operator--(int) + { + Derived tmp{static_cast(*this)}; + operator--(); + return tmp; + } + + /** + * @brief Compound assignment by sum operator. + */ + CUDF_HOST_DEVICE inline Derived& operator+=(difference_type offset) + { + Derived& derived = static_cast(*this); + derived.p_ += offset * width_; + return derived; + } + + /** + * @brief Increment by offset operator. + */ + CUDF_HOST_DEVICE inline Derived operator+(difference_type offset) const + { + auto tmp = Derived{static_cast(*this)}; + tmp.p_ += (offset * width_); + return tmp; + } + + /** + * @brief Addition assignment operator. + */ + CUDF_HOST_DEVICE inline friend Derived operator+(difference_type offset, Derived const& rhs) + { + Derived tmp{rhs}; + tmp.p_ += (offset * rhs.width_); + return tmp; + } + + /** + * @brief Compound assignment by difference operator. + */ + CUDF_HOST_DEVICE inline Derived& operator-=(difference_type offset) + { + Derived& derived = static_cast(*this); + derived.p_ -= offset * width_; + return derived; + } + + /** + * @brief Decrement by offset operator. + */ + CUDF_HOST_DEVICE inline Derived operator-(difference_type offset) const + { + auto tmp = Derived{static_cast(*this)}; + tmp.p_ -= (offset * width_); + return tmp; + } + + /** + * @brief Subtraction assignment operator. + */ + CUDF_HOST_DEVICE inline friend Derived operator-(difference_type offset, Derived const& rhs) + { + Derived tmp{rhs}; + tmp.p_ -= (offset * rhs.width_); + return tmp; + } + + /** + * @brief Compute offset from iterator difference operator. + */ + CUDF_HOST_DEVICE inline difference_type operator-(Derived const& rhs) const + { + return (static_cast(*this).p_ - rhs.p_) / width_; + } + + /** + * @brief Equals to operator. + */ + CUDF_HOST_DEVICE inline bool operator==(Derived const& rhs) const + { + return rhs.p_ == static_cast(*this).p_; + } + + /** + * @brief Not equals to operator. + */ + CUDF_HOST_DEVICE inline bool operator!=(Derived const& rhs) const + { + return rhs.p_ != static_cast(*this).p_; + } + + /** + * @brief Less than operator. + */ + CUDF_HOST_DEVICE inline bool operator<(Derived const& rhs) const + { + return static_cast(*this).p_ < rhs.p_; + } + + /** + * @brief Greater than operator. + */ + CUDF_HOST_DEVICE inline bool operator>(Derived const& rhs) const + { + return static_cast(*this).p_ > rhs.p_; + } + + /** + * @brief Less than or equals to operator. + */ + CUDF_HOST_DEVICE inline bool operator<=(Derived const& rhs) const + { + return static_cast(*this).p_ <= rhs.p_; + } + + /** + * @brief Greater than or equals to operator. + */ + CUDF_HOST_DEVICE inline bool operator>=(Derived const& rhs) const + { + return static_cast(*this).p_ >= rhs.p_; + } + + protected: + /** + * @brief Constructor assigns width and type member variables for base class. + */ + explicit base_normalator(data_type dtype) : width_(size_of(dtype)), dtype_(dtype) {} + + int width_; /// integer type width = 1,2,4, or 8 + data_type dtype_; /// for type-dispatcher calls +}; + +/** + * @brief The integer normalizing input iterator + * + * This is an iterator that can be used for index types (integers) without + * requiring a type-specific instance. It can be used for any iterator + * interface for reading an array of integer values of type + * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. + * Reading specific elements always return a type of `Integer` + * + * @tparam Integer Type returned by all read functions + */ +template +struct input_normalator : base_normalator, Integer> { + friend struct base_normalator, Integer>; // for CRTP + + using reference = Integer const; // this keeps STL and thrust happy + + input_normalator() = default; + input_normalator(input_normalator const&) = default; + input_normalator(input_normalator&&) = default; + input_normalator& operator=(input_normalator const&) = default; + input_normalator& operator=(input_normalator&&) = default; + + /** + * @brief Indirection operator returns the value at the current iterator position + */ + __device__ inline Integer operator*() const { return operator[](0); } + + /** + * @brief Dispatch functor for resolving a Integer value from any integer type + */ + struct normalize_type { + template >* = nullptr> + __device__ Integer operator()(void const* tp) + { + return static_cast(*static_cast(tp)); + } + template >* = nullptr> + __device__ Integer operator()(void const*) + { + CUDF_UNREACHABLE("only integral types are supported"); + } + }; + + /** + * @brief Array subscript operator returns a value at the input + * `idx` position as a `Integer` value. + */ + __device__ inline Integer operator[](size_type idx) const + { + void const* tp = p_ + (idx * this->width_); + return type_dispatcher(this->dtype_, normalize_type{}, tp); + } + + /** + * @brief Create an input index normalizing iterator. + * + * Use the indexalator_factory to create an iterator instance. + * + * @param data Pointer to an integer array in device memory. + * @param data_type Type of data in data + */ + input_normalator(void const* data, data_type dtype) + : base_normalator, Integer>(dtype), p_{static_cast(data)} + { + } + + char const* p_; /// pointer to the integer data in device memory +}; + +/** + * @brief The integer normalizing output iterator + * + * This is an iterator that can be used for index types (integers) without + * requiring a type-specific instance. It can be used for any iterator + * interface for writing an array of integer values of type + * int8, int16, int32, int64, uint8, uint16, uint32, or uint64. + * Setting specific elements always accept the `Integer` type values. + * + * @tparam Integer The type used for all write functions + */ +template +struct output_normalator : base_normalator, Integer> { + friend struct base_normalator, Integer>; // for CRTP + + using reference = output_normalator const&; // required for output iterators + + output_normalator() = default; + output_normalator(output_normalator const&) = default; + output_normalator(output_normalator&&) = default; + output_normalator& operator=(output_normalator const&) = default; + output_normalator& operator=(output_normalator&&) = default; + + /** + * @brief Indirection operator returns this iterator instance in order + * to capture the `operator=(Integer)` calls. + */ + __device__ inline output_normalator const& operator*() const { return *this; } + + /** + * @brief Array subscript operator returns an iterator instance at the specified `idx` position. + * + * This allows capturing the subsequent `operator=(Integer)` call in this class. + */ + __device__ inline output_normalator const operator[](size_type idx) const + { + output_normalator tmp{*this}; + tmp.p_ += (idx * this->width_); + return tmp; + } + + /** + * @brief Dispatch functor for setting the index value from a size_type value. + */ + struct normalize_type { + template >* = nullptr> + __device__ void operator()(void* tp, Integer const value) + { + (*static_cast(tp)) = static_cast(value); + } + template >* = nullptr> + __device__ void operator()(void*, Integer const) + { + CUDF_UNREACHABLE("only index types are supported"); + } + }; + + /** + * @brief Assign an Integer value to the current iterator position + */ + __device__ inline output_normalator const& operator=(Integer const value) const + { + void* tp = p_; + type_dispatcher(this->dtype_, normalize_type{}, tp, value); + return *this; + } + + /** + * @brief Create an output normalizing iterator + * + * @param data Pointer to an integer array in device memory. + * @param data_type Type of data in data + */ + output_normalator(void* data, data_type dtype) + : base_normalator, Integer>(dtype), p_{static_cast(data)} + { + } + + char* p_; /// pointer to the integer data in device memory +}; + +} // namespace detail +} // namespace cudf