From e385fda9173f99b18f2d5c0cb87eb4b3e9c9e2f4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Sep 2023 14:25:32 -0700 Subject: [PATCH 001/100] Add `COUNT_FREQUENCY` and `MERGE_FREQUENCY` aggregations --- cpp/include/cudf/aggregation.hpp | 70 ++++++++++++++++---------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index d319041f8b1..12c6dc1cad7 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -83,40 +83,42 @@ class aggregation { * @brief Possible aggregation operations */ enum Kind { - SUM, ///< sum reduction - PRODUCT, ///< product reduction - MIN, ///< min reduction - MAX, ///< max reduction - COUNT_VALID, ///< count number of valid elements - COUNT_ALL, ///< count number of elements - ANY, ///< any reduction - ALL, ///< all reduction - SUM_OF_SQUARES, ///< sum of squares reduction - MEAN, ///< arithmetic mean reduction - M2, ///< sum of squares of differences from the mean - VARIANCE, ///< variance - STD, ///< standard deviation - MEDIAN, ///< median reduction - QUANTILE, ///< compute specified quantile(s) - ARGMAX, ///< Index of max element - ARGMIN, ///< Index of min element - NUNIQUE, ///< count number of unique elements - NTH_ELEMENT, ///< get the nth element - ROW_NUMBER, ///< get row-number of current index (relative to rolling window) - RANK, ///< get rank of current index - COLLECT_LIST, ///< collect values into a list - COLLECT_SET, ///< collect values into a list without duplicate entries - LEAD, ///< window function, accesses row at specified offset following current row - LAG, ///< window function, accesses row at specified offset preceding current row - PTX, ///< PTX UDF based reduction - CUDA, ///< CUDA UDF based reduction - MERGE_LISTS, ///< merge multiple lists values into one list - MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries - MERGE_M2, ///< merge partial values of M2 aggregation, - COVARIANCE, ///< covariance between two sets of elements - CORRELATION, ///< correlation between two sets of elements - TDIGEST, ///< create a tdigest from a set of input values - MERGE_TDIGEST ///< create a tdigest by merging multiple tdigests together + SUM, ///< sum reduction + PRODUCT, ///< product reduction + MIN, ///< min reduction + MAX, ///< max reduction + COUNT_VALID, ///< count number of valid elements + COUNT_ALL, ///< count number of elements + COUNT_FREQUENCY, ///< count frequency of each element + ANY, ///< any reduction + ALL, ///< all reduction + SUM_OF_SQUARES, ///< sum of squares reduction + MEAN, ///< arithmetic mean reduction + M2, ///< sum of squares of differences from the mean + VARIANCE, ///< variance + STD, ///< standard deviation + MEDIAN, ///< median reduction + QUANTILE, ///< compute specified quantile(s) + ARGMAX, ///< Index of max element + ARGMIN, ///< Index of min element + NUNIQUE, ///< count number of unique elements + NTH_ELEMENT, ///< get the nth element + ROW_NUMBER, ///< get row-number of current index (relative to rolling window) + RANK, ///< get rank of current index + COLLECT_LIST, ///< collect values into a list + COLLECT_SET, ///< collect values into a list without duplicate entries + LEAD, ///< window function, accesses row at specified offset following current row + LAG, ///< window function, accesses row at specified offset preceding current row + PTX, ///< PTX UDF based reduction + CUDA, ///< CUDA UDF based reduction + MERGE_LISTS, ///< merge multiple lists values into one list + MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries + MERGE_M2, ///< merge partial values of M2 aggregation, + MERGE_FREQUENCY, ///< merge partial values of COUNT_FREQUENCY aggregation, + COVARIANCE, ///< covariance between two sets of elements + CORRELATION, ///< correlation between two sets of elements + TDIGEST, ///< create a tdigest from a set of input values + MERGE_TDIGEST ///< create a tdigest by merging multiple tdigests together }; aggregation() = delete; From e3df8d465cdcda5f4ba59e5c233999797fe6f916 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Sep 2023 16:57:04 -0700 Subject: [PATCH 002/100] Change the new aggregations to `HISTOGRAM` and `MERGE_HISTOGRAM` --- cpp/include/cudf/aggregation.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 12c6dc1cad7..8645247f298 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -89,7 +89,7 @@ class aggregation { MAX, ///< max reduction COUNT_VALID, ///< count number of valid elements COUNT_ALL, ///< count number of elements - COUNT_FREQUENCY, ///< count frequency of each element + HISTOGRAM, ///< compute frequency of each element ANY, ///< any reduction ALL, ///< all reduction SUM_OF_SQUARES, ///< sum of squares reduction @@ -114,7 +114,7 @@ class aggregation { MERGE_LISTS, ///< merge multiple lists values into one list MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries MERGE_M2, ///< merge partial values of M2 aggregation, - MERGE_FREQUENCY, ///< merge partial values of COUNT_FREQUENCY aggregation, + MERGE_HISTOGRAM, ///< merge partial values of HISTOGRAM aggregation, COVARIANCE, ///< covariance between two sets of elements CORRELATION, ///< correlation between two sets of elements TDIGEST, ///< create a tdigest from a set of input values From 7bc7f91566892c565002d709b67feace105f768e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Sep 2023 21:03:09 -0700 Subject: [PATCH 003/100] Update copyright year --- cpp/include/cudf/aggregation.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 8645247f298..359c53dff60 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 0fd200085d331e2a9412ddbca47bb6b163cd827a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 6 Sep 2023 22:02:24 -0700 Subject: [PATCH 004/100] Implement interface for the new aggregations --- .../cudf/detail/aggregation/aggregation.hpp | 44 +++++++++++++++++++ cpp/src/aggregation/aggregation.cpp | 42 ++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 4d3984cab93..345977384f3 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -45,6 +45,8 @@ class simple_aggregations_collector { // Declares the interface for the simple class max_aggregation const& agg); virtual std::vector> visit(data_type col_type, class count_aggregation const& agg); + virtual std::vector> visit(data_type col_type, + class histogram_aggregation const& agg); virtual std::vector> visit(data_type col_type, class any_aggregation const& agg); virtual std::vector> visit(data_type col_type, @@ -89,6 +91,8 @@ class simple_aggregations_collector { // Declares the interface for the simple class merge_sets_aggregation const& agg); virtual std::vector> visit(data_type col_type, class merge_m2_aggregation const& agg); + virtual std::vector> visit( + data_type col_type, class merge_histogram_aggregation const& agg); virtual std::vector> visit(data_type col_type, class covariance_aggregation const& agg); virtual std::vector> visit(data_type col_type, @@ -108,6 +112,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class min_aggregation const& agg); virtual void visit(class max_aggregation const& agg); virtual void visit(class count_aggregation const& agg); + virtual void visit(class histogram_aggregation const& agg); virtual void visit(class any_aggregation const& agg); virtual void visit(class all_aggregation const& agg); virtual void visit(class sum_of_squares_aggregation const& agg); @@ -130,6 +135,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer virtual void visit(class merge_lists_aggregation const& agg); virtual void visit(class merge_sets_aggregation const& agg); virtual void visit(class merge_m2_aggregation const& agg); + virtual void visit(class merge_histogram_aggregation const& agg); virtual void visit(class covariance_aggregation const& agg); virtual void visit(class correlation_aggregation const& agg); virtual void visit(class tdigest_aggregation const& agg); @@ -251,6 +257,25 @@ class count_aggregation final : public rolling_aggregation, void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; +/** + * @brief Derived class for specifying a histogram aggregation + */ +class histogram_aggregation final : public groupby_aggregation, public reduce_aggregation { + public: + histogram_aggregation() : aggregation(HISTOGRAM) {} + + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + /** * @brief Derived class for specifying an any aggregation */ @@ -972,6 +997,25 @@ class merge_m2_aggregation final : public groupby_aggregation { void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } }; +/** + * @brief Derived aggregation class for specifying MERGE_HISTOGRAM aggregation + */ +class merge_histogram_aggregation final : public groupby_aggregation, public reduce_aggregation { + public: + explicit merge_histogram_aggregation() : aggregation{MERGE_HISTOGRAM} {} + + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(*this); + } + std::vector> get_simple_aggregations( + data_type col_type, simple_aggregations_collector& collector) const override + { + return collector.visit(col_type, *this); + } + void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); } +}; + /** * @brief Derived aggregation class for specifying COVARIANCE aggregation */ diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp index 2e6a643484e..b3f2a774a60 100644 --- a/cpp/src/aggregation/aggregation.cpp +++ b/cpp/src/aggregation/aggregation.cpp @@ -64,6 +64,12 @@ std::vector> simple_aggregations_collector::visit( return visit(col_type, static_cast(agg)); } +std::vector> simple_aggregations_collector::visit( + data_type col_type, histogram_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + std::vector> simple_aggregations_collector::visit( data_type col_type, any_aggregation const& agg) { @@ -196,6 +202,12 @@ std::vector> simple_aggregations_collector::visit( return visit(col_type, static_cast(agg)); } +std::vector> simple_aggregations_collector::visit( + data_type col_type, merge_histogram_aggregation const& agg) +{ + return visit(col_type, static_cast(agg)); +} + std::vector> simple_aggregations_collector::visit( data_type col_type, covariance_aggregation const& agg) { @@ -246,6 +258,10 @@ void aggregation_finalizer::visit(count_aggregation const& agg) { visit(static_cast(agg)); } +void aggregation_finalizer::visit(histogram_aggregation const& agg) +{ + visit(static_cast(agg)); +} void aggregation_finalizer::visit(any_aggregation const& agg) { @@ -357,6 +373,11 @@ void aggregation_finalizer::visit(merge_m2_aggregation const& agg) visit(static_cast(agg)); } +void aggregation_finalizer::visit(merge_histogram_aggregation const& agg) +{ + visit(static_cast(agg)); +} + void aggregation_finalizer::visit(covariance_aggregation const& agg) { visit(static_cast(agg)); @@ -460,6 +481,16 @@ template std::unique_ptr make_count_aggregation make_count_aggregation( null_policy null_handling); +/// Factory to create a HISTOGRAM aggregation +template +std::unique_ptr make_histogram_aggregation() +{ + return std::make_unique(); +} +template std::unique_ptr make_histogram_aggregation(); +template std::unique_ptr make_histogram_aggregation(); +template std::unique_ptr make_histogram_aggregation(); + /// Factory to create a ANY aggregation template std::unique_ptr make_any_aggregation() @@ -764,6 +795,17 @@ std::unique_ptr make_merge_m2_aggregation() template std::unique_ptr make_merge_m2_aggregation(); template std::unique_ptr make_merge_m2_aggregation(); +/// Factory to create a MERGE_HISTOGRAM aggregation +template +std::unique_ptr make_merge_histogram_aggregation() +{ + return std::make_unique(); +} +template std::unique_ptr make_merge_histogram_aggregation(); +template std::unique_ptr +make_merge_histogram_aggregation(); +template std::unique_ptr make_merge_histogram_aggregation(); + /// Factory to create a COVARIANCE aggregation template std::unique_ptr make_covariance_aggregation(size_type min_periods, size_type ddof) From 1b04436990377028c8f78bbd25742bc02808d757 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 7 Sep 2023 10:33:12 -0700 Subject: [PATCH 005/100] Add new files --- cpp/CMakeLists.txt | 2 ++ cpp/src/groupby/sort/group_histogram.cu | 19 ++++++++++++++ cpp/src/reductions/histogram.cu | 34 +++++++++++++++++++++++++ cpp/src/reductions/histogram.cuh | 23 +++++++++++++++++ 4 files changed, 78 insertions(+) create mode 100644 cpp/src/groupby/sort/group_histogram.cu create mode 100644 cpp/src/reductions/histogram.cu create mode 100644 cpp/src/reductions/histogram.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 516865e5782..a8e45b70572 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -323,6 +323,7 @@ add_library( src/groupby/sort/group_collect.cu src/groupby/sort/group_correlation.cu src/groupby/sort/group_count.cu + src/groupby/sort/group_histogram.cu src/groupby/sort/group_m2.cu src/groupby/sort/group_max.cu src/groupby/sort/group_min.cu @@ -469,6 +470,7 @@ add_library( src/reductions/all.cu src/reductions/any.cu src/reductions/collect_ops.cu + src/reductions/histogram.cu src/reductions/max.cu src/reductions/mean.cu src/reductions/min.cu diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu new file mode 100644 index 00000000000..9eb09738ac4 --- /dev/null +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +namespace cudf::groupby::detail { +} // namespace cudf::groupby::detail diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu new file mode 100644 index 00000000000..5bfed5965f3 --- /dev/null +++ b/cpp/src/reductions/histogram.cu @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + + +#include +#include + +namespace cudf::reduction::detail { + +std::unique_ptr histogram(column_view const& col, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return nullptr; +} + +} // namespace cudf::reduction::detail diff --git a/cpp/src/reductions/histogram.cuh b/cpp/src/reductions/histogram.cuh new file mode 100644 index 00000000000..5951b91a964 --- /dev/null +++ b/cpp/src/reductions/histogram.cuh @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cudf::reduction::detail { + +} // namespace cudf::reduction::detail From 1977d696a523c6cd4eb26a24b50a1aca0ad83099 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Sep 2023 09:17:31 -0700 Subject: [PATCH 006/100] Add skeleton APIs --- .../reduction/detail/reduction_functions.hpp | 22 +++++ cpp/src/groupby/sort/aggregate.cpp | 24 +++++ cpp/src/groupby/sort/group_histogram.cu | 98 +++++++++++++++++++ cpp/src/groupby/sort/group_reductions.hpp | 36 +++++++ cpp/src/reductions/histogram.cu | 14 ++- cpp/src/reductions/reductions.cpp | 3 + 6 files changed, 193 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp index 014a6ba70eb..34c1720aba8 100644 --- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -131,6 +131,28 @@ std::unique_ptr all(column_view const& col, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief + * + * If all elements in input column are null, output scalar is null. + */ +std::unique_ptr histogram(column_view const& col, + data_type const output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief + * + * If all elements in input column are null, output scalar is null. + */ +std::unique_ptr merge_histogram(column_view const& col, + data_type const output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Computes product of elements in input column * diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 3f977dc81d7..f59f2ab0271 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -89,6 +89,18 @@ void aggregate_result_functor::operator()(aggregation co detail::group_count_all(helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(values, agg)) return; + + cache.add_result( + values, + agg, + detail::group_histogram( + get_grouped_values(), helper.group_labels(stream), helper.num_groups(stream), stream, mr)); +} + template <> void aggregate_result_functor::operator()(aggregation const& agg) { @@ -534,6 +546,18 @@ void aggregate_result_functor::operator()(aggregation con get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } +template <> +void aggregate_result_functor::operator()(aggregation const& agg) +{ + if (cache.has_result(values, agg)) { return; } + + cache.add_result( + values, + agg, + detail::group_merge_histogram( + get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); +} + /** * @brief Creates column views with only valid elements in both input column views * diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 9eb09738ac4..5123a9fb500 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -14,6 +14,104 @@ * limitations under the License. */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include namespace cudf::groupby::detail { +std::unique_ptr group_histogram(column_view const& values, + cudf::device_span group_labels, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); + CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), + "Size of values column should be same as that of group labels"); + + auto result = make_numeric_column( + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + + if (num_groups == 0) { return result; } + + if (values.nullable()) { + auto values_view = column_device_view::create(values, stream); + + // make_validity_iterator returns a boolean iterator that sums to 1 (1+1=1) + // so we need to transform it to cast it to an integer type + auto bitmask_iterator = + thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*values_view), + [] __device__(auto b) { return static_cast(b); }); + + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + bitmask_iterator, + thrust::make_discard_iterator(), + result->mutable_view().begin()); + } else { + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + thrust::make_constant_iterator(1), + thrust::make_discard_iterator(), + result->mutable_view().begin()); + } + + return result; +} + +std::unique_ptr group_merge_histogram(column_view const& values, + cudf::device_span group_labels, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); + CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), + "Size of values column should be same as that of group labels"); + + auto result = make_numeric_column( + data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); + + if (num_groups == 0) { return result; } + + if (values.nullable()) { + auto values_view = column_device_view::create(values, stream); + + // make_validity_iterator returns a boolean iterator that sums to 1 (1+1=1) + // so we need to transform it to cast it to an integer type + auto bitmask_iterator = + thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*values_view), + [] __device__(auto b) { return static_cast(b); }); + + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + bitmask_iterator, + thrust::make_discard_iterator(), + result->mutable_view().begin()); + } else { + thrust::reduce_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + thrust::make_constant_iterator(1), + thrust::make_discard_iterator(), + result->mutable_view().begin()); + } + + return result; +} + } // namespace cudf::groupby::detail diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index fc24b679db5..8acf046324b 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -216,6 +216,23 @@ std::unique_ptr group_count_all(cudf::device_span group size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief + * + * @code{.pseudo} + * @endcode + * + * @param values Grouped values to get valid count of + * @param group_labels ID of group that the corresponding value belongs to + * @param num_groups Number of groups ( unique values in @p group_labels ) + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr group_histogram(column_view const& values, + cudf::device_span group_labels, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Internal API to calculate sum of squares of differences from means. @@ -441,6 +458,25 @@ std::unique_ptr group_merge_m2(column_view const& values, size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); + +/** + * @brief + * + * @code{.pseudo} + * @endcode + * + * @param values Grouped values to get valid count of + * @param group_labels ID of group that the corresponding value belongs to + * @param num_groups Number of groups ( unique values in @p group_labels ) + * @param stream CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr group_merge_histogram(column_view const& values, + cudf::device_span group_labels, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + /** * @brief Internal API to find covariance of child columns of a non-nullable struct column. * diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 5bfed5965f3..24e9624cc31 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -18,15 +18,21 @@ #include - #include #include namespace cudf::reduction::detail { -std::unique_ptr histogram(column_view const& col, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr histogram(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return nullptr; +} + +std::unique_ptr merge_histogram(column_view const& input, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { return nullptr; } diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 2fef8aa8785..d6793d85ea6 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -59,6 +59,9 @@ struct reduce_dispatch_functor { case aggregation::MAX: return max(col, output_dtype, init, stream, mr); case aggregation::ANY: return any(col, output_dtype, init, stream, mr); case aggregation::ALL: return all(col, output_dtype, init, stream, mr); + case aggregation::HISTOGRAM: return histogram(col, output_dtype, init, stream, mr); + case aggregation::MERGE_HISTOGRAM: + return merge_histogram(col, output_dtype, init, stream, mr); case aggregation::SUM_OF_SQUARES: return sum_of_squares(col, output_dtype, stream, mr); case aggregation::MEAN: return mean(col, output_dtype, stream, mr); case aggregation::VARIANCE: { From 6fa93fcdff5e9d1fb1bb39beef9dbb47b8aff4aa Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Sep 2023 12:00:52 -0700 Subject: [PATCH 007/100] Extract hash_reduce_by_row --- cpp/CMakeLists.txt | 1 + .../reduction/detail/reduction_functions.hpp | 3 - cpp/src/reductions/hash_reduce_by_row.cu | 86 +++++++++++++ cpp/src/reductions/hash_reduce_by_row.cuh | 116 ++++++++++++++++++ cpp/src/reductions/histogram.cu | 14 +++ cpp/src/reductions/reductions.cpp | 5 +- cpp/src/stream_compaction/distinct.cu | 20 +-- cpp/src/stream_compaction/distinct_reduce.cu | 59 +++------ cpp/src/stream_compaction/distinct_reduce.cuh | 2 +- 9 files changed, 247 insertions(+), 59 deletions(-) create mode 100644 cpp/src/reductions/hash_reduce_by_row.cu create mode 100644 cpp/src/reductions/hash_reduce_by_row.cuh diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a8e45b70572..a8c107e740f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -471,6 +471,7 @@ add_library( src/reductions/any.cu src/reductions/collect_ops.cu src/reductions/histogram.cu + src/reductions/hash_reduce_by_row.cu src/reductions/max.cu src/reductions/mean.cu src/reductions/min.cu diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp index 34c1720aba8..804b79593da 100644 --- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -138,7 +138,6 @@ std::unique_ptr all(column_view const& col, */ std::unique_ptr histogram(column_view const& col, data_type const output_dtype, - std::optional> init, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); @@ -148,8 +147,6 @@ std::unique_ptr histogram(column_view const& col, * If all elements in input column are null, output scalar is null. */ std::unique_ptr merge_histogram(column_view const& col, - data_type const output_dtype, - std::optional> init, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); diff --git a/cpp/src/reductions/hash_reduce_by_row.cu b/cpp/src/reductions/hash_reduce_by_row.cu new file mode 100644 index 00000000000..b93a35d058a --- /dev/null +++ b/cpp/src/reductions/hash_reduce_by_row.cu @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "hash_reduce_by_row.cuh" + +#include +#include +#include + +namespace cudf::detail { + +#if 0 +rmm::device_uvector hash_reduce_by_row( + hash_map_type const& map, + std::shared_ptr const preprocessed_input, + size_type num_rows, + cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, + "This function should not be called with KEEP_ANY"); + + auto reduction_results = rmm::device_uvector(num_rows, stream, mr); + + thrust::uninitialized_fill(rmm::exec_policy(stream), + reduction_results.begin(), + reduction_results.end(), + reduction_init_value(keep)); + + auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); + auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + + auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); + + auto const reduce_by_row = [&](auto const value_comp) { + if (has_nested_columns) { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + reduce_by_row_fn{ + map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); + } else { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + reduce_by_row_fn{ + map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); + } + }; + + if (nans_equal == nan_equality::ALL_EQUAL) { + using nan_equal_comparator = + cudf::experimental::row::equality::nan_equal_physical_equality_comparator; + reduce_by_row(nan_equal_comparator{}); + } else { + using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; + reduce_by_row(nan_unequal_comparator{}); + } + + return reduction_results; +} +#endif + +} // namespace cudf::detail diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/src/reductions/hash_reduce_by_row.cuh new file mode 100644 index 00000000000..b69846c807d --- /dev/null +++ b/cpp/src/reductions/hash_reduce_by_row.cuh @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace cudf::detail { + +/** + * @brief Perform a reduction on groups of rows that are compared equal. + * + * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared + * equal. A hash table is used to find groups of equal rows. + * + * Depending on the `keep` parameter, the reduction operation for each row group is: + * - If `keep == KEEP_FIRST`: min of row indices in the group. + * - If `keep == KEEP_LAST`: max of row indices in the group. + * - If `keep == KEEP_NONE`: count of equivalent rows (group size). + * + * At the beginning of the operation, the entire output array is filled with a value given by + * the `reduction_init_value()` function. Then, the reduction result for each row group is written + * into the output array at the index of an unspecified row in the group. + * + * @param map The auxiliary map to perform reduction + * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row + * comparisons + * @param num_rows The number of all input rows + * @param has_nulls Indicate whether the input rows has any nulls at any nested levels + * @param has_nested_columns Indicates whether the input table has any nested columns + * @param keep The parameter to determine what type of reduction to perform + * @param nulls_equal Flag to specify whether null elements should be considered as equal + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned vector + * @return A device_uvector containing the reduction results + */ +rmm::device_uvector hash_reduce_by_row( + hash_map_type const& map, + std::shared_ptr const preprocessed_input, + size_type num_rows, + cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief A functor to perform reduce-by-key with keys are rows that compared equal. + * + * TODO: We need to switch to use `static_reduction_map` when it is ready + * (https://github.com/NVIDIA/cuCollections/pull/98). + */ +template +struct reduce_by_row_fn { + MapView const d_map; + KeyHasher const d_hasher; + KeyEqual const d_equal; + OutputType* const d_output; + + reduce_by_row_fn(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + OutputType* const d_output) + : d_map{d_map}, d_hasher{d_hasher}, d_equal{d_equal}, d_output{d_output} + { + } + + protected: + __device__ OutputType* get_output_ptr(size_type const idx) const + { + auto const iter = d_map.find(idx, d_hasher, d_equal); + + if (iter != d_map.end()) { + // Only one index value of the duplicate rows could be inserted into the map. + // As such, looking up for all indices of duplicate rows always returns the same value. + auto const inserted_idx = iter->second.load(cuda::std::memory_order_relaxed); + + // All duplicate rows will have concurrent access to this same output slot. + return &d_output[inserted_idx]; + } else { + // All input `idx` values have been inserted into the map before. + // Thus, searching for an `idx` key resulting in the `end()` iterator only happens if + // `d_equal(idx, idx) == false`. + // Such situations are due to comparing nulls or NaNs which are considered as always unequal. + // In those cases, all rows containing nulls or NaNs are distinct. Just return their direct + // output slot. + return &d_output[idx]; + } + } +}; + +} // namespace cudf::detail diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 24e9624cc31..053ad62180b 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -21,12 +21,20 @@ #include #include +#include + namespace cudf::reduction::detail { std::unique_ptr histogram(column_view const& input, + data_type const output_dtype, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(cudf::is_integral(output_dtype), + "The output type of histogram aggregation must be an integral type."); + + + return nullptr; } @@ -34,6 +42,12 @@ std::unique_ptr merge_histogram(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS( + input.type().id() == type_id::STRUCT && input.num_children() == 2, + "The input of merge_histogram aggregation must be a struct column having two children."); + CUDF_EXPECTS(cudf::is_integral(input.child(1).type()), + "The second child of the input column must be an integer type."); + return nullptr; } diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index d6793d85ea6..8d19413190b 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -59,9 +59,8 @@ struct reduce_dispatch_functor { case aggregation::MAX: return max(col, output_dtype, init, stream, mr); case aggregation::ANY: return any(col, output_dtype, init, stream, mr); case aggregation::ALL: return all(col, output_dtype, init, stream, mr); - case aggregation::HISTOGRAM: return histogram(col, output_dtype, init, stream, mr); - case aggregation::MERGE_HISTOGRAM: - return merge_histogram(col, output_dtype, init, stream, mr); + case aggregation::HISTOGRAM: return histogram(col, output_dtype, stream, mr); + case aggregation::MERGE_HISTOGRAM: return merge_histogram(col, stream, mr); case aggregation::SUM_OF_SQUARES: return sum_of_squares(col, output_dtype, stream, mr); case aggregation::MEAN: return mean(col, output_dtype, stream, mr); case aggregation::VARIANCE: { diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index cc60b2a12ea..8b0710372a6 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -96,16 +96,16 @@ rmm::device_uvector get_distinct_indices(table_view const& input, } // For other keep options, reduce by row on rows that compare equal. - auto const reduction_results = hash_reduce_by_row(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - keep, - nulls_equal, - nans_equal, - stream, - rmm::mr::get_current_device_resource()); + auto const reduction_results = distinct_reduce(map, + std::move(preprocessed_input), + input.num_rows(), + has_nulls, + has_nested_columns, + keep, + nulls_equal, + nans_equal, + stream, + rmm::mr::get_current_device_resource()); // Extract the desired output indices from reduction results. auto const map_end = [&] { diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index 020e6a495bc..7562a174ebb 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -16,6 +16,9 @@ #include "distinct_reduce.cuh" +#include + + #include #include #include @@ -24,31 +27,26 @@ namespace cudf::detail { namespace { /** - * @brief A functor to perform reduce-by-key with keys are rows that compared equal. + * @brief * - * TODO: We need to switch to use `static_reduction_map` when it is ready - * (https://github.com/NVIDIA/cuCollections/pull/98). */ template -struct reduce_by_row_fn { - MapView const d_map; - KeyHasher const d_hasher; - KeyEqual const d_equal; +struct distinct_reduce_fn : reduce_by_row_fn { duplicate_keep_option const keep; - size_type* const d_output; - - reduce_by_row_fn(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - duplicate_keep_option const keep, - size_type* const d_output) - : d_map{d_map}, d_hasher{d_hasher}, d_equal{d_equal}, keep{keep}, d_output{d_output} + + distinct_reduce_fn(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + duplicate_keep_option const keep, + size_type* const d_output) + : reduce_by_row_fn(d_map, d_hasher, d_equal, d_output), + keep{keep} { } __device__ void operator()(size_type const idx) const { - auto const out_ptr = get_output_ptr(idx); + auto const out_ptr = this->get_output_ptr(idx); if (keep == duplicate_keep_option::KEEP_FIRST) { // Store the smallest index of all rows that are equal. @@ -61,34 +59,11 @@ struct reduce_by_row_fn { atomicAdd(out_ptr, size_type{1}); } } - - private: - __device__ size_type* get_output_ptr(size_type const idx) const - { - auto const iter = d_map.find(idx, d_hasher, d_equal); - - if (iter != d_map.end()) { - // Only one index value of the duplicate rows could be inserted into the map. - // As such, looking up for all indices of duplicate rows always returns the same value. - auto const inserted_idx = iter->second.load(cuda::std::memory_order_relaxed); - - // All duplicate rows will have concurrent access to this same output slot. - return &d_output[inserted_idx]; - } else { - // All input `idx` values have been inserted into the map before. - // Thus, searching for an `idx` key resulting in the `end()` iterator only happens if - // `d_equal(idx, idx) == false`. - // Such situations are due to comparing nulls or NaNs which are considered as always unequal. - // In those cases, all rows containing nulls or NaNs are distinct. Just return their direct - // output slot. - return &d_output[idx]; - } - } }; } // namespace -rmm::device_uvector hash_reduce_by_row( +rmm::device_uvector distinct_reduce( hash_map_type const& map, std::shared_ptr const preprocessed_input, size_type num_rows, @@ -122,7 +97,7 @@ rmm::device_uvector hash_reduce_by_row( rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_rows), - reduce_by_row_fn{ + distinct_reduce_fn{ map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); } else { auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); @@ -130,7 +105,7 @@ rmm::device_uvector hash_reduce_by_row( rmm::exec_policy(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_rows), - reduce_by_row_fn{ + distinct_reduce_fn{ map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); } }; diff --git a/cpp/src/stream_compaction/distinct_reduce.cuh b/cpp/src/stream_compaction/distinct_reduce.cuh index 8ec1fa18205..74fba8196f4 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cuh +++ b/cpp/src/stream_compaction/distinct_reduce.cuh @@ -72,7 +72,7 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * @param mr Device memory resource used to allocate the returned vector * @return A device_uvector containing the reduction results */ -rmm::device_uvector hash_reduce_by_row( +rmm::device_uvector distinct_reduce( hash_map_type const& map, std::shared_ptr const preprocessed_input, size_type num_rows, From d11dd7f195e5cc71c3ad8b2d6ba780f039e3a48c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 11 Sep 2023 22:28:39 -0700 Subject: [PATCH 008/100] Adopt `hash_reduce_by_row` in `distinct_reduce` --- cpp/CMakeLists.txt | 1 - cpp/src/reductions/hash_reduce_by_row.cu | 86 -------------------- cpp/src/reductions/hash_reduce_by_row.cuh | 55 +++++++++++++ cpp/src/stream_compaction/distinct_reduce.cu | 78 ++++++++---------- 4 files changed, 89 insertions(+), 131 deletions(-) delete mode 100644 cpp/src/reductions/hash_reduce_by_row.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a8c107e740f..a8e45b70572 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -471,7 +471,6 @@ add_library( src/reductions/any.cu src/reductions/collect_ops.cu src/reductions/histogram.cu - src/reductions/hash_reduce_by_row.cu src/reductions/max.cu src/reductions/mean.cu src/reductions/min.cu diff --git a/cpp/src/reductions/hash_reduce_by_row.cu b/cpp/src/reductions/hash_reduce_by_row.cu deleted file mode 100644 index b93a35d058a..00000000000 --- a/cpp/src/reductions/hash_reduce_by_row.cu +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "hash_reduce_by_row.cuh" - -#include -#include -#include - -namespace cudf::detail { - -#if 0 -rmm::device_uvector hash_reduce_by_row( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, - "This function should not be called with KEEP_ANY"); - - auto reduction_results = rmm::device_uvector(num_rows, stream, mr); - - thrust::uninitialized_fill(rmm::exec_policy(stream), - reduction_results.begin(), - reduction_results.end(), - reduction_init_value(keep)); - - auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); - - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - - auto const reduce_by_row = [&](auto const value_comp) { - if (has_nested_columns) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - reduce_by_row_fn{ - map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); - } else { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - reduce_by_row_fn{ - map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); - } - }; - - if (nans_equal == nan_equality::ALL_EQUAL) { - using nan_equal_comparator = - cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - reduce_by_row(nan_equal_comparator{}); - } else { - using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - reduce_by_row(nan_unequal_comparator{}); - } - - return reduction_results; -} -#endif - -} // namespace cudf::detail diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/src/reductions/hash_reduce_by_row.cuh index b69846c807d..c64f65f30b7 100644 --- a/cpp/src/reductions/hash_reduce_by_row.cuh +++ b/cpp/src/reductions/hash_reduce_by_row.cuh @@ -113,4 +113,59 @@ struct reduce_by_row_fn { } }; +template +rmm::device_uvector hash_reduce_by_row( + hash_map_type const& map, + std::shared_ptr const preprocessed_input, + size_type num_rows, + cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, + null_equality nulls_equal, + nan_equality nans_equal, + ReduceFuncBuilder func_builder, + OutputType init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto reduction_results = rmm::device_uvector(num_rows, stream, mr); + + thrust::uninitialized_fill( + rmm::exec_policy(stream), reduction_results.begin(), reduction_results.end(), init); + + auto const map_dview = map.get_device_view(); + auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); + auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + + auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); + + auto const reduce_by_row = [&](auto const value_comp) { + if (has_nested_columns) { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin())); + } else { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin())); + } + }; + + if (nans_equal == nan_equality::ALL_EQUAL) { + using nan_equal_comparator = + cudf::experimental::row::equality::nan_equal_physical_equality_comparator; + reduce_by_row(nan_equal_comparator{}); + } else { + using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; + reduce_by_row(nan_unequal_comparator{}); + } + + return reduction_results; +} + } // namespace cudf::detail diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index 7562a174ebb..8b51ccc4026 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -18,7 +18,6 @@ #include - #include #include #include @@ -61,6 +60,19 @@ struct distinct_reduce_fn : reduce_by_row_fn +struct reduce_func_builder { + template + static auto build(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + size_type* const d_output) + { + return distinct_reduce_fn{ + d_map, d_hasher, d_equal, keep, d_output}; + } +}; + } // namespace rmm::device_uvector distinct_reduce( @@ -75,51 +87,29 @@ rmm::device_uvector distinct_reduce( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, - "This function should not be called with KEEP_ANY"); - - auto reduction_results = rmm::device_uvector(num_rows, stream, mr); - - thrust::uninitialized_fill(rmm::exec_policy(stream), - reduction_results.begin(), - reduction_results.end(), - reduction_init_value(keep)); - - auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); - - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - - auto const reduce_by_row = [&](auto const value_comp) { - if (has_nested_columns) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - distinct_reduce_fn{ - map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); - } else { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - distinct_reduce_fn{ - map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); - } + auto const hash_reduce = [&](auto const& func_builder) { + return hash_reduce_by_row(map, + preprocessed_input, + num_rows, + has_nulls, + has_nested_columns, + nulls_equal, + nans_equal, + func_builder, + reduction_init_value(keep), + stream, + mr); }; - - if (nans_equal == nan_equality::ALL_EQUAL) { - using nan_equal_comparator = - cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - reduce_by_row(nan_equal_comparator{}); - } else { - using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - reduce_by_row(nan_unequal_comparator{}); + switch (keep) { + case duplicate_keep_option::KEEP_FIRST: + return hash_reduce(reduce_func_builder{}); + case duplicate_keep_option::KEEP_LAST: + return hash_reduce(reduce_func_builder{}); + case duplicate_keep_option::KEEP_NONE: + return hash_reduce(reduce_func_builder{}); + default: // KEEP_ANY + CUDF_FAIL("This function should not be called with KEEP_ANY"); } - - return reduction_results; } } // namespace cudf::detail From e58f3e33224e4dcd59707c100e0d976fee2fce9e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 09:55:22 -0700 Subject: [PATCH 009/100] Rename struct and simplify code --- cpp/src/reductions/hash_reduce_by_row.cuh | 10 ++-- cpp/src/stream_compaction/distinct_reduce.cu | 53 +++++++++----------- 2 files changed, 28 insertions(+), 35 deletions(-) diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/src/reductions/hash_reduce_by_row.cuh index c64f65f30b7..2566cee6c7f 100644 --- a/cpp/src/reductions/hash_reduce_by_row.cuh +++ b/cpp/src/reductions/hash_reduce_by_row.cuh @@ -75,16 +75,16 @@ rmm::device_uvector hash_reduce_by_row( * (https://github.com/NVIDIA/cuCollections/pull/98). */ template -struct reduce_by_row_fn { +struct reduce_by_row_fn_base { MapView const d_map; KeyHasher const d_hasher; KeyEqual const d_equal; OutputType* const d_output; - reduce_by_row_fn(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - OutputType* const d_output) + reduce_by_row_fn_base(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + OutputType* const d_output) : d_map{d_map}, d_hasher{d_hasher}, d_equal{d_equal}, d_output{d_output} { } diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index 8b51ccc4026..0b621f87fbf 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -30,7 +30,7 @@ namespace { * */ template -struct distinct_reduce_fn : reduce_by_row_fn { +struct distinct_reduce_fn : reduce_by_row_fn_base { duplicate_keep_option const keep; distinct_reduce_fn(MapView const& d_map, @@ -38,7 +38,8 @@ struct distinct_reduce_fn : reduce_by_row_fn(d_map, d_hasher, d_equal, d_output), + : reduce_by_row_fn_base( + d_map, d_hasher, d_equal, d_output), keep{keep} { } @@ -60,13 +61,14 @@ struct distinct_reduce_fn : reduce_by_row_fn struct reduce_func_builder { + duplicate_keep_option keep; + template - static auto build(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - size_type* const d_output) + auto build(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + size_type* const d_output) { return distinct_reduce_fn{ d_map, d_hasher, d_equal, keep, d_output}; @@ -87,29 +89,20 @@ rmm::device_uvector distinct_reduce( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto const hash_reduce = [&](auto const& func_builder) { - return hash_reduce_by_row(map, - preprocessed_input, - num_rows, - has_nulls, - has_nested_columns, - nulls_equal, - nans_equal, - func_builder, - reduction_init_value(keep), - stream, - mr); - }; - switch (keep) { - case duplicate_keep_option::KEEP_FIRST: - return hash_reduce(reduce_func_builder{}); - case duplicate_keep_option::KEEP_LAST: - return hash_reduce(reduce_func_builder{}); - case duplicate_keep_option::KEEP_NONE: - return hash_reduce(reduce_func_builder{}); - default: // KEEP_ANY - CUDF_FAIL("This function should not be called with KEEP_ANY"); - } + CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, + "This function should not be called with KEEP_ANY"); + + return hash_reduce_by_row(map, + preprocessed_input, + num_rows, + has_nulls, + has_nested_columns, + nulls_equal, + nans_equal, + reduce_func_builder{keep}, + reduction_init_value(keep), + stream, + mr); } } // namespace cudf::detail From 3cf194824e64952bac314762a35d0746ce5c4e68 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 10:04:52 -0700 Subject: [PATCH 010/100] Refactor `hash_reduce_by_row` --- cpp/src/reductions/hash_reduce_by_row.cuh | 171 ++++++++++++++++++ cpp/src/stream_compaction/distinct.cu | 20 +- cpp/src/stream_compaction/distinct_reduce.cu | 114 ++++-------- cpp/src/stream_compaction/distinct_reduce.cuh | 2 +- 4 files changed, 218 insertions(+), 89 deletions(-) create mode 100644 cpp/src/reductions/hash_reduce_by_row.cuh diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/src/reductions/hash_reduce_by_row.cuh new file mode 100644 index 00000000000..2566cee6c7f --- /dev/null +++ b/cpp/src/reductions/hash_reduce_by_row.cuh @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace cudf::detail { + +/** + * @brief Perform a reduction on groups of rows that are compared equal. + * + * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared + * equal. A hash table is used to find groups of equal rows. + * + * Depending on the `keep` parameter, the reduction operation for each row group is: + * - If `keep == KEEP_FIRST`: min of row indices in the group. + * - If `keep == KEEP_LAST`: max of row indices in the group. + * - If `keep == KEEP_NONE`: count of equivalent rows (group size). + * + * At the beginning of the operation, the entire output array is filled with a value given by + * the `reduction_init_value()` function. Then, the reduction result for each row group is written + * into the output array at the index of an unspecified row in the group. + * + * @param map The auxiliary map to perform reduction + * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row + * comparisons + * @param num_rows The number of all input rows + * @param has_nulls Indicate whether the input rows has any nulls at any nested levels + * @param has_nested_columns Indicates whether the input table has any nested columns + * @param keep The parameter to determine what type of reduction to perform + * @param nulls_equal Flag to specify whether null elements should be considered as equal + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned vector + * @return A device_uvector containing the reduction results + */ +rmm::device_uvector hash_reduce_by_row( + hash_map_type const& map, + std::shared_ptr const preprocessed_input, + size_type num_rows, + cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +/** + * @brief A functor to perform reduce-by-key with keys are rows that compared equal. + * + * TODO: We need to switch to use `static_reduction_map` when it is ready + * (https://github.com/NVIDIA/cuCollections/pull/98). + */ +template +struct reduce_by_row_fn_base { + MapView const d_map; + KeyHasher const d_hasher; + KeyEqual const d_equal; + OutputType* const d_output; + + reduce_by_row_fn_base(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + OutputType* const d_output) + : d_map{d_map}, d_hasher{d_hasher}, d_equal{d_equal}, d_output{d_output} + { + } + + protected: + __device__ OutputType* get_output_ptr(size_type const idx) const + { + auto const iter = d_map.find(idx, d_hasher, d_equal); + + if (iter != d_map.end()) { + // Only one index value of the duplicate rows could be inserted into the map. + // As such, looking up for all indices of duplicate rows always returns the same value. + auto const inserted_idx = iter->second.load(cuda::std::memory_order_relaxed); + + // All duplicate rows will have concurrent access to this same output slot. + return &d_output[inserted_idx]; + } else { + // All input `idx` values have been inserted into the map before. + // Thus, searching for an `idx` key resulting in the `end()` iterator only happens if + // `d_equal(idx, idx) == false`. + // Such situations are due to comparing nulls or NaNs which are considered as always unequal. + // In those cases, all rows containing nulls or NaNs are distinct. Just return their direct + // output slot. + return &d_output[idx]; + } + } +}; + +template +rmm::device_uvector hash_reduce_by_row( + hash_map_type const& map, + std::shared_ptr const preprocessed_input, + size_type num_rows, + cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, + null_equality nulls_equal, + nan_equality nans_equal, + ReduceFuncBuilder func_builder, + OutputType init, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto reduction_results = rmm::device_uvector(num_rows, stream, mr); + + thrust::uninitialized_fill( + rmm::exec_policy(stream), reduction_results.begin(), reduction_results.end(), init); + + auto const map_dview = map.get_device_view(); + auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); + auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + + auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); + + auto const reduce_by_row = [&](auto const value_comp) { + if (has_nested_columns) { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin())); + } else { + auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); + thrust::for_each( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin())); + } + }; + + if (nans_equal == nan_equality::ALL_EQUAL) { + using nan_equal_comparator = + cudf::experimental::row::equality::nan_equal_physical_equality_comparator; + reduce_by_row(nan_equal_comparator{}); + } else { + using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; + reduce_by_row(nan_unequal_comparator{}); + } + + return reduction_results; +} + +} // namespace cudf::detail diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index cc60b2a12ea..8b0710372a6 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -96,16 +96,16 @@ rmm::device_uvector get_distinct_indices(table_view const& input, } // For other keep options, reduce by row on rows that compare equal. - auto const reduction_results = hash_reduce_by_row(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - keep, - nulls_equal, - nans_equal, - stream, - rmm::mr::get_current_device_resource()); + auto const reduction_results = distinct_reduce(map, + std::move(preprocessed_input), + input.num_rows(), + has_nulls, + has_nested_columns, + keep, + nulls_equal, + nans_equal, + stream, + rmm::mr::get_current_device_resource()); // Extract the desired output indices from reduction results. auto const map_end = [&] { diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index 020e6a495bc..0b621f87fbf 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -16,6 +16,8 @@ #include "distinct_reduce.cuh" +#include + #include #include #include @@ -24,31 +26,27 @@ namespace cudf::detail { namespace { /** - * @brief A functor to perform reduce-by-key with keys are rows that compared equal. + * @brief * - * TODO: We need to switch to use `static_reduction_map` when it is ready - * (https://github.com/NVIDIA/cuCollections/pull/98). */ template -struct reduce_by_row_fn { - MapView const d_map; - KeyHasher const d_hasher; - KeyEqual const d_equal; +struct distinct_reduce_fn : reduce_by_row_fn_base { duplicate_keep_option const keep; - size_type* const d_output; - reduce_by_row_fn(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - duplicate_keep_option const keep, - size_type* const d_output) - : d_map{d_map}, d_hasher{d_hasher}, d_equal{d_equal}, keep{keep}, d_output{d_output} + distinct_reduce_fn(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + duplicate_keep_option const keep, + size_type* const d_output) + : reduce_by_row_fn_base( + d_map, d_hasher, d_equal, d_output), + keep{keep} { } __device__ void operator()(size_type const idx) const { - auto const out_ptr = get_output_ptr(idx); + auto const out_ptr = this->get_output_ptr(idx); if (keep == duplicate_keep_option::KEEP_FIRST) { // Store the smallest index of all rows that are equal. @@ -61,34 +59,25 @@ struct reduce_by_row_fn { atomicAdd(out_ptr, size_type{1}); } } +}; - private: - __device__ size_type* get_output_ptr(size_type const idx) const - { - auto const iter = d_map.find(idx, d_hasher, d_equal); - - if (iter != d_map.end()) { - // Only one index value of the duplicate rows could be inserted into the map. - // As such, looking up for all indices of duplicate rows always returns the same value. - auto const inserted_idx = iter->second.load(cuda::std::memory_order_relaxed); +struct reduce_func_builder { + duplicate_keep_option keep; - // All duplicate rows will have concurrent access to this same output slot. - return &d_output[inserted_idx]; - } else { - // All input `idx` values have been inserted into the map before. - // Thus, searching for an `idx` key resulting in the `end()` iterator only happens if - // `d_equal(idx, idx) == false`. - // Such situations are due to comparing nulls or NaNs which are considered as always unequal. - // In those cases, all rows containing nulls or NaNs are distinct. Just return their direct - // output slot. - return &d_output[idx]; - } + template + auto build(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + size_type* const d_output) + { + return distinct_reduce_fn{ + d_map, d_hasher, d_equal, keep, d_output}; } }; } // namespace -rmm::device_uvector hash_reduce_by_row( +rmm::device_uvector distinct_reduce( hash_map_type const& map, std::shared_ptr const preprocessed_input, size_type num_rows, @@ -103,48 +92,17 @@ rmm::device_uvector hash_reduce_by_row( CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, "This function should not be called with KEEP_ANY"); - auto reduction_results = rmm::device_uvector(num_rows, stream, mr); - - thrust::uninitialized_fill(rmm::exec_policy(stream), - reduction_results.begin(), - reduction_results.end(), - reduction_init_value(keep)); - - auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); - - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - - auto const reduce_by_row = [&](auto const value_comp) { - if (has_nested_columns) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - reduce_by_row_fn{ - map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); - } else { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - reduce_by_row_fn{ - map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); - } - }; - - if (nans_equal == nan_equality::ALL_EQUAL) { - using nan_equal_comparator = - cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - reduce_by_row(nan_equal_comparator{}); - } else { - using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - reduce_by_row(nan_unequal_comparator{}); - } - - return reduction_results; + return hash_reduce_by_row(map, + preprocessed_input, + num_rows, + has_nulls, + has_nested_columns, + nulls_equal, + nans_equal, + reduce_func_builder{keep}, + reduction_init_value(keep), + stream, + mr); } } // namespace cudf::detail diff --git a/cpp/src/stream_compaction/distinct_reduce.cuh b/cpp/src/stream_compaction/distinct_reduce.cuh index 8ec1fa18205..74fba8196f4 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cuh +++ b/cpp/src/stream_compaction/distinct_reduce.cuh @@ -72,7 +72,7 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * @param mr Device memory resource used to allocate the returned vector * @return A device_uvector containing the reduction results */ -rmm::device_uvector hash_reduce_by_row( +rmm::device_uvector distinct_reduce( hash_map_type const& map, std::shared_ptr const preprocessed_input, size_type num_rows, From 84886467639b5303572f2e85402b3db075e36cbd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 10:54:46 -0700 Subject: [PATCH 011/100] Rewrite `hash_reduce_by_row.cuh` --- cpp/src/reductions/hash_reduce_by_row.cuh | 91 ++++++++++------------- 1 file changed, 40 insertions(+), 51 deletions(-) diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/src/reductions/hash_reduce_by_row.cuh index 2566cee6c7f..d30e96bc9d2 100644 --- a/cpp/src/reductions/hash_reduce_by_row.cuh +++ b/cpp/src/reductions/hash_reduce_by_row.cuh @@ -16,8 +16,6 @@ #include -#include -#include #include #include @@ -25,57 +23,22 @@ #include #include -#include +#include +#include +#include namespace cudf::detail { /** - * @brief Perform a reduction on groups of rows that are compared equal. - * - * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared - * equal. A hash table is used to find groups of equal rows. - * - * Depending on the `keep` parameter, the reduction operation for each row group is: - * - If `keep == KEEP_FIRST`: min of row indices in the group. - * - If `keep == KEEP_LAST`: max of row indices in the group. - * - If `keep == KEEP_NONE`: count of equivalent rows (group size). - * - * At the beginning of the operation, the entire output array is filled with a value given by - * the `reduction_init_value()` function. Then, the reduction result for each row group is written - * into the output array at the index of an unspecified row in the group. - * - * @param map The auxiliary map to perform reduction - * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row - * comparisons - * @param num_rows The number of all input rows - * @param has_nulls Indicate whether the input rows has any nulls at any nested levels - * @param has_nested_columns Indicates whether the input table has any nested columns - * @param keep The parameter to determine what type of reduction to perform - * @param nulls_equal Flag to specify whether null elements should be considered as equal - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned vector - * @return A device_uvector containing the reduction results - */ -rmm::device_uvector hash_reduce_by_row( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); - -/** - * @brief A functor to perform reduce-by-key with keys are rows that compared equal. + * @brief The base struct for customized reduction functor to perform reduce-by-key with keys are + * rows that compared equal. * * TODO: We need to switch to use `static_reduction_map` when it is ready * (https://github.com/NVIDIA/cuCollections/pull/98). */ template struct reduce_by_row_fn_base { + protected: MapView const d_map; KeyHasher const d_hasher; KeyEqual const d_equal; @@ -89,13 +52,18 @@ struct reduce_by_row_fn_base { { } - protected: + /** + * @brief Return a pointer to the output array at the given index. + * + * @param idx The access index + * @return A pointer to the given index in the output array + */ __device__ OutputType* get_output_ptr(size_type const idx) const { auto const iter = d_map.find(idx, d_hasher, d_equal); if (iter != d_map.end()) { - // Only one index value of the duplicate rows could be inserted into the map. + // Only one (undetermined) index value of the duplicate rows could be inserted into the map. // As such, looking up for all indices of duplicate rows always returns the same value. auto const inserted_idx = iter->second.load(cuda::std::memory_order_relaxed); @@ -113,6 +81,29 @@ struct reduce_by_row_fn_base { } }; +/** + * @brief Perform a reduction on groups of rows that are compared equal. + * + * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared + * equal. A hash table is used to find groups of equal rows. + * + * At the beginning of the operation, the entire output array is filled with a value given by + * the `init` parameter. Then, the reduction result for each row group is written into the output + * array at the index of an unspecified row in the group. + * + * @param map The auxiliary map to perform reduction + * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row + * comparisons + * @param num_rows The number of all input rows + * @param has_nulls Indicate whether the input rows has any nulls at any nested levels + * @param has_nested_columns Indicates whether the input table has any nested columns + * @param nulls_equal Flag to specify whether null elements should be considered as equal + * @param nans_equal Flag to specify whether NaN values in floating point column should be + * considered equal. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned vector + * @return A device_uvector containing the reduction results + */ template rmm::device_uvector hash_reduce_by_row( hash_map_type const& map, @@ -127,16 +118,14 @@ rmm::device_uvector hash_reduce_by_row( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto reduction_results = rmm::device_uvector(num_rows, stream, mr); - - thrust::uninitialized_fill( - rmm::exec_policy(stream), reduction_results.begin(), reduction_results.end(), init); - auto const map_dview = map.get_device_view(); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); + auto reduction_results = rmm::device_uvector(num_rows, stream, mr); + thrust::uninitialized_fill( + rmm::exec_policy(stream), reduction_results.begin(), reduction_results.end(), init); auto const reduce_by_row = [&](auto const value_comp) { if (has_nested_columns) { From 1994684b77c43fc81dd33e4b564a87b7c3b84a9c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 10:55:57 -0700 Subject: [PATCH 012/100] Rename and rewrite `distinct_reduce.hpp` --- .../{distinct_reduce.cuh => distinct_reduce.hpp} | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) rename cpp/src/stream_compaction/{distinct_reduce.cuh => distinct_reduce.hpp} (93%) diff --git a/cpp/src/stream_compaction/distinct_reduce.cuh b/cpp/src/stream_compaction/distinct_reduce.hpp similarity index 93% rename from cpp/src/stream_compaction/distinct_reduce.cuh rename to cpp/src/stream_compaction/distinct_reduce.hpp index 74fba8196f4..236b6c860c3 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cuh +++ b/cpp/src/stream_compaction/distinct_reduce.hpp @@ -14,18 +14,14 @@ * limitations under the License. */ -#include "stream_compaction_common.cuh" +#include "stream_compaction_common.hpp" -#include #include #include #include #include #include -#include - -#include namespace cudf::detail { @@ -56,6 +52,8 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * - If `keep == KEEP_LAST`: max of row indices in the group. * - If `keep == KEEP_NONE`: count of equivalent rows (group size). * + * Note that this function is not needed when `keep == KEEP_NONE`. + * * At the beginning of the operation, the entire output array is filled with a value given by * the `reduction_init_value()` function. Then, the reduction result for each row group is written * into the output array at the index of an unspecified row in the group. @@ -68,6 +66,8 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * @param has_nested_columns Indicates whether the input table has any nested columns * @param keep The parameter to determine what type of reduction to perform * @param nulls_equal Flag to specify whether null elements should be considered as equal + * @param nans_equal Flag to specify whether NaN values in floating point column should be + * considered equal. * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned vector * @return A device_uvector containing the reduction results From 5dcbac9dec96e4525369029bb23774020f9b5c1e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 11:00:16 -0700 Subject: [PATCH 013/100] Rewrite `distinct.cu` --- cpp/src/stream_compaction/distinct.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 8b0710372a6..b551df96765 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -14,7 +14,8 @@ * limitations under the License. */ -#include "distinct_reduce.cuh" +#include "distinct_reduce.hpp" +#include "stream_compaction_common.cuh" #include #include From 6236fcc9551685b8da497c0b86eda08769615f61 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 11:10:14 -0700 Subject: [PATCH 014/100] Rewrite `distinct_reduce.cu` --- cpp/src/stream_compaction/distinct_reduce.cu | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index 0b621f87fbf..24926cdbd4a 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -14,20 +14,15 @@ * limitations under the License. */ -#include "distinct_reduce.cuh" +#include "distinct_reduce.hpp" #include -#include -#include -#include - namespace cudf::detail { namespace { /** - * @brief - * + * @brief The functor to find the first/last/none duplicate row for rows that compared equal. */ template struct distinct_reduce_fn : reduce_by_row_fn_base { @@ -61,6 +56,10 @@ struct distinct_reduce_fn : reduce_by_row_fn_base Date: Tue, 12 Sep 2023 11:10:23 -0700 Subject: [PATCH 015/100] Rewrite `hash_reduce_by_row.cuh` --- cpp/src/reductions/hash_reduce_by_row.cuh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/src/reductions/hash_reduce_by_row.cuh index d30e96bc9d2..1cff009b17b 100644 --- a/cpp/src/reductions/hash_reduce_by_row.cuh +++ b/cpp/src/reductions/hash_reduce_by_row.cuh @@ -91,6 +91,9 @@ struct reduce_by_row_fn_base { * the `init` parameter. Then, the reduction result for each row group is written into the output * array at the index of an unspecified row in the group. * + * @tparam ReduceFuncBuilder The builder class that must have a `build()` method returning a + * reduction functor derived from `reduce_by_row_fn_base` + * @tparam OutputType Type of the reduction results * @param map The auxiliary map to perform reduction * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row * comparisons @@ -100,6 +103,7 @@ struct reduce_by_row_fn_base { * @param nulls_equal Flag to specify whether null elements should be considered as equal * @param nans_equal Flag to specify whether NaN values in floating point column should be * considered equal. + * @param init The initial value for reduction of each row group * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned vector * @return A device_uvector containing the reduction results From 584ff8dc600a6c6d13f5f5adadae719c0aa7eb2f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 11:22:06 -0700 Subject: [PATCH 016/100] Minor changes --- cpp/src/stream_compaction/distinct_reduce.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index 24926cdbd4a..a451643794d 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -33,8 +33,8 @@ struct distinct_reduce_fn : reduce_by_row_fn_base( - d_map, d_hasher, d_equal, d_output), + : reduce_by_row_fn_base{ + d_map, d_hasher, d_equal, d_output}, keep{keep} { } @@ -61,7 +61,7 @@ struct distinct_reduce_fn : reduce_by_row_fn_base auto build(MapView const& d_map, From 4a3d60d62598c6180431b99f3ab03c3787fd445f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 11:28:51 -0700 Subject: [PATCH 017/100] Fix style --- cpp/src/stream_compaction/distinct_reduce.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index a451643794d..8cfb7b93515 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -33,8 +33,10 @@ struct distinct_reduce_fn : reduce_by_row_fn_base{ - d_map, d_hasher, d_equal, d_output}, + : reduce_by_row_fn_base{d_map, + d_hasher, + d_equal, + d_output}, keep{keep} { } From 34cb488c27880f40a579686ebd18d447f734f240 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 11:32:09 -0700 Subject: [PATCH 018/100] Fix comment --- cpp/src/stream_compaction/distinct_reduce.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu index 8cfb7b93515..64d29ae2ff0 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -22,7 +22,7 @@ namespace cudf::detail { namespace { /** - * @brief The functor to find the first/last/none duplicate row for rows that compared equal. + * @brief The functor to find the first/last/all duplicate row for rows that compared equal. */ template struct distinct_reduce_fn : reduce_by_row_fn_base { From e73c07f8690e0b331fcda717436524a9fff99793 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 12:41:51 -0700 Subject: [PATCH 019/100] Move file --- .../reductions => include/cudf/detail}/hash_reduce_by_row.cuh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cpp/{src/reductions => include/cudf/detail}/hash_reduce_by_row.cuh (100%) diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh similarity index 100% rename from cpp/src/reductions/hash_reduce_by_row.cuh rename to cpp/include/cudf/detail/hash_reduce_by_row.cuh From 40e8730d806f7e965e59fb216a795771968055c3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 12:45:16 -0700 Subject: [PATCH 020/100] Merge `distinct_reduce.*` into `distinct.cu` --- cpp/CMakeLists.txt | 1 - cpp/src/stream_compaction/distinct.cu | 101 ++++++++++++++-- cpp/src/stream_compaction/distinct_reduce.cu | 109 ------------------ cpp/src/stream_compaction/distinct_reduce.hpp | 87 -------------- 4 files changed, 90 insertions(+), 208 deletions(-) delete mode 100644 cpp/src/stream_compaction/distinct_reduce.cu delete mode 100644 cpp/src/stream_compaction/distinct_reduce.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a8e45b70572..e65ca2895c4 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -532,7 +532,6 @@ add_library( src/stream_compaction/apply_boolean_mask.cu src/stream_compaction/distinct.cu src/stream_compaction/distinct_count.cu - src/stream_compaction/distinct_reduce.cu src/stream_compaction/drop_nans.cu src/stream_compaction/drop_nulls.cu src/stream_compaction/stable_distinct.cu diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index b551df96765..8a7f6daa193 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "distinct_reduce.hpp" #include "stream_compaction_common.cuh" #include #include +#include #include #include #include @@ -39,6 +39,80 @@ namespace cudf { namespace detail { +namespace { +/** + * @brief Return the reduction identity used to initialize results of `hash_reduce_by_row`. + * + * @param keep A value of `duplicate_keep_option` type, must not be `KEEP_ANY`. + * @return The initial reduction value. + */ +auto constexpr reduction_init_value(duplicate_keep_option keep) +{ + switch (keep) { + case duplicate_keep_option::KEEP_FIRST: return std::numeric_limits::max(); + case duplicate_keep_option::KEEP_LAST: return std::numeric_limits::min(); + case duplicate_keep_option::KEEP_NONE: return size_type{0}; + default: CUDF_UNREACHABLE("This function should not be called with KEEP_ANY"); + } +} + +/** + * @brief The functor to find the first/last/all duplicate row for rows that compared equal. + */ +template +struct distinct_reduce_fn : reduce_by_row_fn_base { + duplicate_keep_option const keep; + + distinct_reduce_fn(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + duplicate_keep_option const keep, + size_type* const d_output) + : reduce_by_row_fn_base{d_map, + d_hasher, + d_equal, + d_output}, + keep{keep} + { + } + + __device__ void operator()(size_type const idx) const + { + auto const out_ptr = this->get_output_ptr(idx); + + if (keep == duplicate_keep_option::KEEP_FIRST) { + // Store the smallest index of all rows that are equal. + atomicMin(out_ptr, idx); + } else if (keep == duplicate_keep_option::KEEP_LAST) { + // Store the greatest index of all rows that are equal. + atomicMax(out_ptr, idx); + } else { + // Count the number of rows in each group of rows that are compared equal. + atomicAdd(out_ptr, size_type{1}); + } + } +}; + +/** + * @brief The builder to construct an instance of `distinct_reduce_fn` functor base on the given + * value of the `duplicate_keep_option` member variable. + */ +struct reduce_func_builder { + duplicate_keep_option const keep; + + template + auto build(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + size_type* const d_output) + { + return distinct_reduce_fn{ + d_map, d_hasher, d_equal, keep, d_output}; + } +}; + +} // namespace + rmm::device_uvector get_distinct_indices(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, @@ -97,16 +171,21 @@ rmm::device_uvector get_distinct_indices(table_view const& input, } // For other keep options, reduce by row on rows that compare equal. - auto const reduction_results = distinct_reduce(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - keep, - nulls_equal, - nans_equal, - stream, - rmm::mr::get_current_device_resource()); + // Depending on the `keep` parameter, the reduction operation for each row group is: + // - If `keep == KEEP_FIRST`: min of row indices in the group. + // - If `keep == KEEP_LAST`: max of row indices in the group. + // - If `keep == KEEP_NONE`: count of equivalent rows (group size). + auto const reduction_results = hash_reduce_by_row(map, + std::move(preprocessed_input), + input.num_rows(), + has_nulls, + has_nested_columns, + nulls_equal, + nans_equal, + reduce_func_builder{keep}, + reduction_init_value(keep), + stream, + mr); // Extract the desired output indices from reduction results. auto const map_end = [&] { diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu deleted file mode 100644 index 64d29ae2ff0..00000000000 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "distinct_reduce.hpp" - -#include - -namespace cudf::detail { - -namespace { -/** - * @brief The functor to find the first/last/all duplicate row for rows that compared equal. - */ -template -struct distinct_reduce_fn : reduce_by_row_fn_base { - duplicate_keep_option const keep; - - distinct_reduce_fn(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - duplicate_keep_option const keep, - size_type* const d_output) - : reduce_by_row_fn_base{d_map, - d_hasher, - d_equal, - d_output}, - keep{keep} - { - } - - __device__ void operator()(size_type const idx) const - { - auto const out_ptr = this->get_output_ptr(idx); - - if (keep == duplicate_keep_option::KEEP_FIRST) { - // Store the smallest index of all rows that are equal. - atomicMin(out_ptr, idx); - } else if (keep == duplicate_keep_option::KEEP_LAST) { - // Store the greatest index of all rows that are equal. - atomicMax(out_ptr, idx); - } else { - // Count the number of rows in each group of rows that are compared equal. - atomicAdd(out_ptr, size_type{1}); - } - } -}; - -/** - * @brief The builder to construct an instance of `distinct_reduce_fn` functor base on the given - * value of the `duplicate_keep_option` member variable. - */ -struct reduce_func_builder { - duplicate_keep_option const keep; - - template - auto build(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - size_type* const d_output) - { - return distinct_reduce_fn{ - d_map, d_hasher, d_equal, keep, d_output}; - } -}; - -} // namespace - -rmm::device_uvector distinct_reduce( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, - "This function should not be called with KEEP_ANY"); - - return hash_reduce_by_row(map, - preprocessed_input, - num_rows, - has_nulls, - has_nested_columns, - nulls_equal, - nans_equal, - reduce_func_builder{keep}, - reduction_init_value(keep), - stream, - mr); -} - -} // namespace cudf::detail diff --git a/cpp/src/stream_compaction/distinct_reduce.hpp b/cpp/src/stream_compaction/distinct_reduce.hpp deleted file mode 100644 index 236b6c860c3..00000000000 --- a/cpp/src/stream_compaction/distinct_reduce.hpp +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "stream_compaction_common.hpp" - -#include -#include -#include - -#include -#include - -namespace cudf::detail { - -/** - * @brief Return the reduction identity used to initialize results of `hash_reduce_by_row`. - * - * @param keep A value of `duplicate_keep_option` type, must not be `KEEP_ANY`. - * @return The initial reduction value. - */ -auto constexpr reduction_init_value(duplicate_keep_option keep) -{ - switch (keep) { - case duplicate_keep_option::KEEP_FIRST: return std::numeric_limits::max(); - case duplicate_keep_option::KEEP_LAST: return std::numeric_limits::min(); - case duplicate_keep_option::KEEP_NONE: return size_type{0}; - default: CUDF_UNREACHABLE("This function should not be called with KEEP_ANY"); - } -} - -/** - * @brief Perform a reduction on groups of rows that are compared equal. - * - * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared - * equal. A hash table is used to find groups of equal rows. - * - * Depending on the `keep` parameter, the reduction operation for each row group is: - * - If `keep == KEEP_FIRST`: min of row indices in the group. - * - If `keep == KEEP_LAST`: max of row indices in the group. - * - If `keep == KEEP_NONE`: count of equivalent rows (group size). - * - * Note that this function is not needed when `keep == KEEP_NONE`. - * - * At the beginning of the operation, the entire output array is filled with a value given by - * the `reduction_init_value()` function. Then, the reduction result for each row group is written - * into the output array at the index of an unspecified row in the group. - * - * @param map The auxiliary map to perform reduction - * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row - * comparisons - * @param num_rows The number of all input rows - * @param has_nulls Indicate whether the input rows has any nulls at any nested levels - * @param has_nested_columns Indicates whether the input table has any nested columns - * @param keep The parameter to determine what type of reduction to perform - * @param nulls_equal Flag to specify whether null elements should be considered as equal - * @param nans_equal Flag to specify whether NaN values in floating point column should be - * considered equal. - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned vector - * @return A device_uvector containing the reduction results - */ -rmm::device_uvector distinct_reduce( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); - -} // namespace cudf::detail From 95e4463262aa72b250df41a33367f9f66237a825 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 12:41:51 -0700 Subject: [PATCH 021/100] Move file --- .../reductions => include/cudf/detail}/hash_reduce_by_row.cuh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cpp/{src/reductions => include/cudf/detail}/hash_reduce_by_row.cuh (100%) diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh similarity index 100% rename from cpp/src/reductions/hash_reduce_by_row.cuh rename to cpp/include/cudf/detail/hash_reduce_by_row.cuh From 723ae4c720c3fc4a5f950c230657abeac60644c5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 12:45:16 -0700 Subject: [PATCH 022/100] Merge `distinct_reduce.*` into `distinct.cu` --- cpp/CMakeLists.txt | 1 - cpp/src/stream_compaction/distinct.cu | 101 ++++++++++++++-- cpp/src/stream_compaction/distinct_reduce.cu | 109 ------------------ cpp/src/stream_compaction/distinct_reduce.hpp | 87 -------------- 4 files changed, 90 insertions(+), 208 deletions(-) delete mode 100644 cpp/src/stream_compaction/distinct_reduce.cu delete mode 100644 cpp/src/stream_compaction/distinct_reduce.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 516865e5782..5703318592f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -530,7 +530,6 @@ add_library( src/stream_compaction/apply_boolean_mask.cu src/stream_compaction/distinct.cu src/stream_compaction/distinct_count.cu - src/stream_compaction/distinct_reduce.cu src/stream_compaction/drop_nans.cu src/stream_compaction/drop_nulls.cu src/stream_compaction/stable_distinct.cu diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index b551df96765..8a7f6daa193 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -14,11 +14,11 @@ * limitations under the License. */ -#include "distinct_reduce.hpp" #include "stream_compaction_common.cuh" #include #include +#include #include #include #include @@ -39,6 +39,80 @@ namespace cudf { namespace detail { +namespace { +/** + * @brief Return the reduction identity used to initialize results of `hash_reduce_by_row`. + * + * @param keep A value of `duplicate_keep_option` type, must not be `KEEP_ANY`. + * @return The initial reduction value. + */ +auto constexpr reduction_init_value(duplicate_keep_option keep) +{ + switch (keep) { + case duplicate_keep_option::KEEP_FIRST: return std::numeric_limits::max(); + case duplicate_keep_option::KEEP_LAST: return std::numeric_limits::min(); + case duplicate_keep_option::KEEP_NONE: return size_type{0}; + default: CUDF_UNREACHABLE("This function should not be called with KEEP_ANY"); + } +} + +/** + * @brief The functor to find the first/last/all duplicate row for rows that compared equal. + */ +template +struct distinct_reduce_fn : reduce_by_row_fn_base { + duplicate_keep_option const keep; + + distinct_reduce_fn(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + duplicate_keep_option const keep, + size_type* const d_output) + : reduce_by_row_fn_base{d_map, + d_hasher, + d_equal, + d_output}, + keep{keep} + { + } + + __device__ void operator()(size_type const idx) const + { + auto const out_ptr = this->get_output_ptr(idx); + + if (keep == duplicate_keep_option::KEEP_FIRST) { + // Store the smallest index of all rows that are equal. + atomicMin(out_ptr, idx); + } else if (keep == duplicate_keep_option::KEEP_LAST) { + // Store the greatest index of all rows that are equal. + atomicMax(out_ptr, idx); + } else { + // Count the number of rows in each group of rows that are compared equal. + atomicAdd(out_ptr, size_type{1}); + } + } +}; + +/** + * @brief The builder to construct an instance of `distinct_reduce_fn` functor base on the given + * value of the `duplicate_keep_option` member variable. + */ +struct reduce_func_builder { + duplicate_keep_option const keep; + + template + auto build(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + size_type* const d_output) + { + return distinct_reduce_fn{ + d_map, d_hasher, d_equal, keep, d_output}; + } +}; + +} // namespace + rmm::device_uvector get_distinct_indices(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, @@ -97,16 +171,21 @@ rmm::device_uvector get_distinct_indices(table_view const& input, } // For other keep options, reduce by row on rows that compare equal. - auto const reduction_results = distinct_reduce(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - keep, - nulls_equal, - nans_equal, - stream, - rmm::mr::get_current_device_resource()); + // Depending on the `keep` parameter, the reduction operation for each row group is: + // - If `keep == KEEP_FIRST`: min of row indices in the group. + // - If `keep == KEEP_LAST`: max of row indices in the group. + // - If `keep == KEEP_NONE`: count of equivalent rows (group size). + auto const reduction_results = hash_reduce_by_row(map, + std::move(preprocessed_input), + input.num_rows(), + has_nulls, + has_nested_columns, + nulls_equal, + nans_equal, + reduce_func_builder{keep}, + reduction_init_value(keep), + stream, + mr); // Extract the desired output indices from reduction results. auto const map_end = [&] { diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu deleted file mode 100644 index 64d29ae2ff0..00000000000 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "distinct_reduce.hpp" - -#include - -namespace cudf::detail { - -namespace { -/** - * @brief The functor to find the first/last/all duplicate row for rows that compared equal. - */ -template -struct distinct_reduce_fn : reduce_by_row_fn_base { - duplicate_keep_option const keep; - - distinct_reduce_fn(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - duplicate_keep_option const keep, - size_type* const d_output) - : reduce_by_row_fn_base{d_map, - d_hasher, - d_equal, - d_output}, - keep{keep} - { - } - - __device__ void operator()(size_type const idx) const - { - auto const out_ptr = this->get_output_ptr(idx); - - if (keep == duplicate_keep_option::KEEP_FIRST) { - // Store the smallest index of all rows that are equal. - atomicMin(out_ptr, idx); - } else if (keep == duplicate_keep_option::KEEP_LAST) { - // Store the greatest index of all rows that are equal. - atomicMax(out_ptr, idx); - } else { - // Count the number of rows in each group of rows that are compared equal. - atomicAdd(out_ptr, size_type{1}); - } - } -}; - -/** - * @brief The builder to construct an instance of `distinct_reduce_fn` functor base on the given - * value of the `duplicate_keep_option` member variable. - */ -struct reduce_func_builder { - duplicate_keep_option const keep; - - template - auto build(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - size_type* const d_output) - { - return distinct_reduce_fn{ - d_map, d_hasher, d_equal, keep, d_output}; - } -}; - -} // namespace - -rmm::device_uvector distinct_reduce( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, - "This function should not be called with KEEP_ANY"); - - return hash_reduce_by_row(map, - preprocessed_input, - num_rows, - has_nulls, - has_nested_columns, - nulls_equal, - nans_equal, - reduce_func_builder{keep}, - reduction_init_value(keep), - stream, - mr); -} - -} // namespace cudf::detail diff --git a/cpp/src/stream_compaction/distinct_reduce.hpp b/cpp/src/stream_compaction/distinct_reduce.hpp deleted file mode 100644 index 236b6c860c3..00000000000 --- a/cpp/src/stream_compaction/distinct_reduce.hpp +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "stream_compaction_common.hpp" - -#include -#include -#include - -#include -#include - -namespace cudf::detail { - -/** - * @brief Return the reduction identity used to initialize results of `hash_reduce_by_row`. - * - * @param keep A value of `duplicate_keep_option` type, must not be `KEEP_ANY`. - * @return The initial reduction value. - */ -auto constexpr reduction_init_value(duplicate_keep_option keep) -{ - switch (keep) { - case duplicate_keep_option::KEEP_FIRST: return std::numeric_limits::max(); - case duplicate_keep_option::KEEP_LAST: return std::numeric_limits::min(); - case duplicate_keep_option::KEEP_NONE: return size_type{0}; - default: CUDF_UNREACHABLE("This function should not be called with KEEP_ANY"); - } -} - -/** - * @brief Perform a reduction on groups of rows that are compared equal. - * - * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared - * equal. A hash table is used to find groups of equal rows. - * - * Depending on the `keep` parameter, the reduction operation for each row group is: - * - If `keep == KEEP_FIRST`: min of row indices in the group. - * - If `keep == KEEP_LAST`: max of row indices in the group. - * - If `keep == KEEP_NONE`: count of equivalent rows (group size). - * - * Note that this function is not needed when `keep == KEEP_NONE`. - * - * At the beginning of the operation, the entire output array is filled with a value given by - * the `reduction_init_value()` function. Then, the reduction result for each row group is written - * into the output array at the index of an unspecified row in the group. - * - * @param map The auxiliary map to perform reduction - * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row - * comparisons - * @param num_rows The number of all input rows - * @param has_nulls Indicate whether the input rows has any nulls at any nested levels - * @param has_nested_columns Indicates whether the input table has any nested columns - * @param keep The parameter to determine what type of reduction to perform - * @param nulls_equal Flag to specify whether null elements should be considered as equal - * @param nans_equal Flag to specify whether NaN values in floating point column should be - * considered equal. - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned vector - * @return A device_uvector containing the reduction results - */ -rmm::device_uvector distinct_reduce( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - duplicate_keep_option keep, - null_equality nulls_equal, - nan_equality nans_equal, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); - -} // namespace cudf::detail From 8fb7a9e7124a3bfcac780c108b6cc7e629c47219 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 13:21:57 -0700 Subject: [PATCH 023/100] Revert "Merge `distinct_reduce.*` into `distinct.cu`" This reverts commit 723ae4c720c3fc4a5f950c230657abeac60644c5. --- cpp/CMakeLists.txt | 1 + cpp/src/stream_compaction/distinct.cu | 101 ++-------------- cpp/src/stream_compaction/distinct_reduce.cu | 109 ++++++++++++++++++ cpp/src/stream_compaction/distinct_reduce.hpp | 87 ++++++++++++++ 4 files changed, 208 insertions(+), 90 deletions(-) create mode 100644 cpp/src/stream_compaction/distinct_reduce.cu create mode 100644 cpp/src/stream_compaction/distinct_reduce.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5703318592f..516865e5782 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -530,6 +530,7 @@ add_library( src/stream_compaction/apply_boolean_mask.cu src/stream_compaction/distinct.cu src/stream_compaction/distinct_count.cu + src/stream_compaction/distinct_reduce.cu src/stream_compaction/drop_nans.cu src/stream_compaction/drop_nulls.cu src/stream_compaction/stable_distinct.cu diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index 8a7f6daa193..b551df96765 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -14,11 +14,11 @@ * limitations under the License. */ +#include "distinct_reduce.hpp" #include "stream_compaction_common.cuh" #include #include -#include #include #include #include @@ -39,80 +39,6 @@ namespace cudf { namespace detail { -namespace { -/** - * @brief Return the reduction identity used to initialize results of `hash_reduce_by_row`. - * - * @param keep A value of `duplicate_keep_option` type, must not be `KEEP_ANY`. - * @return The initial reduction value. - */ -auto constexpr reduction_init_value(duplicate_keep_option keep) -{ - switch (keep) { - case duplicate_keep_option::KEEP_FIRST: return std::numeric_limits::max(); - case duplicate_keep_option::KEEP_LAST: return std::numeric_limits::min(); - case duplicate_keep_option::KEEP_NONE: return size_type{0}; - default: CUDF_UNREACHABLE("This function should not be called with KEEP_ANY"); - } -} - -/** - * @brief The functor to find the first/last/all duplicate row for rows that compared equal. - */ -template -struct distinct_reduce_fn : reduce_by_row_fn_base { - duplicate_keep_option const keep; - - distinct_reduce_fn(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - duplicate_keep_option const keep, - size_type* const d_output) - : reduce_by_row_fn_base{d_map, - d_hasher, - d_equal, - d_output}, - keep{keep} - { - } - - __device__ void operator()(size_type const idx) const - { - auto const out_ptr = this->get_output_ptr(idx); - - if (keep == duplicate_keep_option::KEEP_FIRST) { - // Store the smallest index of all rows that are equal. - atomicMin(out_ptr, idx); - } else if (keep == duplicate_keep_option::KEEP_LAST) { - // Store the greatest index of all rows that are equal. - atomicMax(out_ptr, idx); - } else { - // Count the number of rows in each group of rows that are compared equal. - atomicAdd(out_ptr, size_type{1}); - } - } -}; - -/** - * @brief The builder to construct an instance of `distinct_reduce_fn` functor base on the given - * value of the `duplicate_keep_option` member variable. - */ -struct reduce_func_builder { - duplicate_keep_option const keep; - - template - auto build(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - size_type* const d_output) - { - return distinct_reduce_fn{ - d_map, d_hasher, d_equal, keep, d_output}; - } -}; - -} // namespace - rmm::device_uvector get_distinct_indices(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, @@ -171,21 +97,16 @@ rmm::device_uvector get_distinct_indices(table_view const& input, } // For other keep options, reduce by row on rows that compare equal. - // Depending on the `keep` parameter, the reduction operation for each row group is: - // - If `keep == KEEP_FIRST`: min of row indices in the group. - // - If `keep == KEEP_LAST`: max of row indices in the group. - // - If `keep == KEEP_NONE`: count of equivalent rows (group size). - auto const reduction_results = hash_reduce_by_row(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - nulls_equal, - nans_equal, - reduce_func_builder{keep}, - reduction_init_value(keep), - stream, - mr); + auto const reduction_results = distinct_reduce(map, + std::move(preprocessed_input), + input.num_rows(), + has_nulls, + has_nested_columns, + keep, + nulls_equal, + nans_equal, + stream, + rmm::mr::get_current_device_resource()); // Extract the desired output indices from reduction results. auto const map_end = [&] { diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_reduce.cu new file mode 100644 index 00000000000..64d29ae2ff0 --- /dev/null +++ b/cpp/src/stream_compaction/distinct_reduce.cu @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "distinct_reduce.hpp" + +#include + +namespace cudf::detail { + +namespace { +/** + * @brief The functor to find the first/last/all duplicate row for rows that compared equal. + */ +template +struct distinct_reduce_fn : reduce_by_row_fn_base { + duplicate_keep_option const keep; + + distinct_reduce_fn(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + duplicate_keep_option const keep, + size_type* const d_output) + : reduce_by_row_fn_base{d_map, + d_hasher, + d_equal, + d_output}, + keep{keep} + { + } + + __device__ void operator()(size_type const idx) const + { + auto const out_ptr = this->get_output_ptr(idx); + + if (keep == duplicate_keep_option::KEEP_FIRST) { + // Store the smallest index of all rows that are equal. + atomicMin(out_ptr, idx); + } else if (keep == duplicate_keep_option::KEEP_LAST) { + // Store the greatest index of all rows that are equal. + atomicMax(out_ptr, idx); + } else { + // Count the number of rows in each group of rows that are compared equal. + atomicAdd(out_ptr, size_type{1}); + } + } +}; + +/** + * @brief The builder to construct an instance of `distinct_reduce_fn` functor base on the given + * value of the `duplicate_keep_option` member variable. + */ +struct reduce_func_builder { + duplicate_keep_option const keep; + + template + auto build(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + size_type* const d_output) + { + return distinct_reduce_fn{ + d_map, d_hasher, d_equal, keep, d_output}; + } +}; + +} // namespace + +rmm::device_uvector distinct_reduce( + hash_map_type const& map, + std::shared_ptr const preprocessed_input, + size_type num_rows, + cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(keep != duplicate_keep_option::KEEP_ANY, + "This function should not be called with KEEP_ANY"); + + return hash_reduce_by_row(map, + preprocessed_input, + num_rows, + has_nulls, + has_nested_columns, + nulls_equal, + nans_equal, + reduce_func_builder{keep}, + reduction_init_value(keep), + stream, + mr); +} + +} // namespace cudf::detail diff --git a/cpp/src/stream_compaction/distinct_reduce.hpp b/cpp/src/stream_compaction/distinct_reduce.hpp new file mode 100644 index 00000000000..236b6c860c3 --- /dev/null +++ b/cpp/src/stream_compaction/distinct_reduce.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "stream_compaction_common.hpp" + +#include +#include +#include + +#include +#include + +namespace cudf::detail { + +/** + * @brief Return the reduction identity used to initialize results of `hash_reduce_by_row`. + * + * @param keep A value of `duplicate_keep_option` type, must not be `KEEP_ANY`. + * @return The initial reduction value. + */ +auto constexpr reduction_init_value(duplicate_keep_option keep) +{ + switch (keep) { + case duplicate_keep_option::KEEP_FIRST: return std::numeric_limits::max(); + case duplicate_keep_option::KEEP_LAST: return std::numeric_limits::min(); + case duplicate_keep_option::KEEP_NONE: return size_type{0}; + default: CUDF_UNREACHABLE("This function should not be called with KEEP_ANY"); + } +} + +/** + * @brief Perform a reduction on groups of rows that are compared equal. + * + * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared + * equal. A hash table is used to find groups of equal rows. + * + * Depending on the `keep` parameter, the reduction operation for each row group is: + * - If `keep == KEEP_FIRST`: min of row indices in the group. + * - If `keep == KEEP_LAST`: max of row indices in the group. + * - If `keep == KEEP_NONE`: count of equivalent rows (group size). + * + * Note that this function is not needed when `keep == KEEP_NONE`. + * + * At the beginning of the operation, the entire output array is filled with a value given by + * the `reduction_init_value()` function. Then, the reduction result for each row group is written + * into the output array at the index of an unspecified row in the group. + * + * @param map The auxiliary map to perform reduction + * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row + * comparisons + * @param num_rows The number of all input rows + * @param has_nulls Indicate whether the input rows has any nulls at any nested levels + * @param has_nested_columns Indicates whether the input table has any nested columns + * @param keep The parameter to determine what type of reduction to perform + * @param nulls_equal Flag to specify whether null elements should be considered as equal + * @param nans_equal Flag to specify whether NaN values in floating point column should be + * considered equal. + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned vector + * @return A device_uvector containing the reduction results + */ +rmm::device_uvector distinct_reduce( + hash_map_type const& map, + std::shared_ptr const preprocessed_input, + size_type num_rows, + cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, + duplicate_keep_option keep, + null_equality nulls_equal, + nan_equality nans_equal, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace cudf::detail From 65427c8211f5e4f63b2f3174f3fad284cf17f258 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 13:31:16 -0700 Subject: [PATCH 024/100] Rename function --- cpp/CMakeLists.txt | 2 +- cpp/src/stream_compaction/distinct.cu | 22 ++++++++--------- ...distinct_reduce.cu => distinct_helpers.cu} | 24 +++++++++---------- ...stinct_reduce.hpp => distinct_helpers.hpp} | 2 +- 4 files changed, 25 insertions(+), 25 deletions(-) rename cpp/src/stream_compaction/{distinct_reduce.cu => distinct_helpers.cu} (82%) rename cpp/src/stream_compaction/{distinct_reduce.hpp => distinct_helpers.hpp} (98%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 516865e5782..ca6444bd2f7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -530,7 +530,7 @@ add_library( src/stream_compaction/apply_boolean_mask.cu src/stream_compaction/distinct.cu src/stream_compaction/distinct_count.cu - src/stream_compaction/distinct_reduce.cu + src/stream_compaction/distinct_helpers.cu src/stream_compaction/drop_nans.cu src/stream_compaction/drop_nulls.cu src/stream_compaction/stable_distinct.cu diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index b551df96765..de2cd6da0dd 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "distinct_reduce.hpp" +#include "distinct_helpers.hpp" #include "stream_compaction_common.cuh" #include @@ -97,16 +97,16 @@ rmm::device_uvector get_distinct_indices(table_view const& input, } // For other keep options, reduce by row on rows that compare equal. - auto const reduction_results = distinct_reduce(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - keep, - nulls_equal, - nans_equal, - stream, - rmm::mr::get_current_device_resource()); + auto const reduction_results = indices_reduce_by_row(map, + std::move(preprocessed_input), + input.num_rows(), + has_nulls, + has_nested_columns, + keep, + nulls_equal, + nans_equal, + stream, + rmm::mr::get_current_device_resource()); // Extract the desired output indices from reduction results. auto const map_end = [&] { diff --git a/cpp/src/stream_compaction/distinct_reduce.cu b/cpp/src/stream_compaction/distinct_helpers.cu similarity index 82% rename from cpp/src/stream_compaction/distinct_reduce.cu rename to cpp/src/stream_compaction/distinct_helpers.cu index 64d29ae2ff0..5d31e87943a 100644 --- a/cpp/src/stream_compaction/distinct_reduce.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -#include "distinct_reduce.hpp" +#include "distinct_helpers.hpp" -#include +#include namespace cudf::detail { @@ -25,14 +25,14 @@ namespace { * @brief The functor to find the first/last/all duplicate row for rows that compared equal. */ template -struct distinct_reduce_fn : reduce_by_row_fn_base { +struct reduce_fn : reduce_by_row_fn_base { duplicate_keep_option const keep; - distinct_reduce_fn(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - duplicate_keep_option const keep, - size_type* const d_output) + reduce_fn(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + duplicate_keep_option const keep, + size_type* const d_output) : reduce_by_row_fn_base{d_map, d_hasher, d_equal, @@ -59,7 +59,7 @@ struct distinct_reduce_fn : reduce_by_row_fn_base{ - d_map, d_hasher, d_equal, keep, d_output}; + return reduce_fn{d_map, d_hasher, d_equal, keep, d_output}; } }; } // namespace -rmm::device_uvector distinct_reduce( +// This function is split from `distinct.cu` to improve compile time. +rmm::device_uvector indices_reduce_by_row( hash_map_type const& map, std::shared_ptr const preprocessed_input, size_type num_rows, diff --git a/cpp/src/stream_compaction/distinct_reduce.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp similarity index 98% rename from cpp/src/stream_compaction/distinct_reduce.hpp rename to cpp/src/stream_compaction/distinct_helpers.hpp index 236b6c860c3..9ae29783ca4 100644 --- a/cpp/src/stream_compaction/distinct_reduce.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -72,7 +72,7 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * @param mr Device memory resource used to allocate the returned vector * @return A device_uvector containing the reduction results */ -rmm::device_uvector distinct_reduce( +rmm::device_uvector indices_reduce_by_row( hash_map_type const& map, std::shared_ptr const preprocessed_input, size_type num_rows, From 0c0c7ac8eb66d2e4192ef8499cbff1ef0b385014 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 15:56:14 -0700 Subject: [PATCH 025/100] Fix output type --- cpp/include/cudf/detail/hash_reduce_by_row.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh index 1cff009b17b..35654b90bc0 100644 --- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh +++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh @@ -109,7 +109,7 @@ struct reduce_by_row_fn_base { * @return A device_uvector containing the reduction results */ template -rmm::device_uvector hash_reduce_by_row( +rmm::device_uvector hash_reduce_by_row( hash_map_type const& map, std::shared_ptr const preprocessed_input, size_type num_rows, From 01cc1c2bf82924c0f239ea90a6e360602ee34a60 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 19:28:35 -0700 Subject: [PATCH 026/100] Move file --- .../cudf/detail => src/reductions}/hash_reduce_by_row.cuh | 0 cpp/src/stream_compaction/distinct_helpers.cu | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/{include/cudf/detail => src/reductions}/hash_reduce_by_row.cuh (100%) diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/src/reductions/hash_reduce_by_row.cuh similarity index 100% rename from cpp/include/cudf/detail/hash_reduce_by_row.cuh rename to cpp/src/reductions/hash_reduce_by_row.cuh diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index 5d31e87943a..cb0dc4b1c50 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -16,7 +16,7 @@ #include "distinct_helpers.hpp" -#include +#include namespace cudf::detail { From f5a6a1a66841b82b1da0ff75a21d0faa98440847 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 19:30:15 -0700 Subject: [PATCH 027/100] Rename function --- cpp/src/stream_compaction/distinct.cu | 20 +++++++++---------- cpp/src/stream_compaction/distinct_helpers.cu | 2 +- .../stream_compaction/distinct_helpers.hpp | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index de2cd6da0dd..e031727c21a 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -97,16 +97,16 @@ rmm::device_uvector get_distinct_indices(table_view const& input, } // For other keep options, reduce by row on rows that compare equal. - auto const reduction_results = indices_reduce_by_row(map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - keep, - nulls_equal, - nans_equal, - stream, - rmm::mr::get_current_device_resource()); + auto const reduction_results = reduce_by_row(map, + std::move(preprocessed_input), + input.num_rows(), + has_nulls, + has_nested_columns, + keep, + nulls_equal, + nans_equal, + stream, + rmm::mr::get_current_device_resource()); // Extract the desired output indices from reduction results. auto const map_end = [&] { diff --git a/cpp/src/stream_compaction/distinct_helpers.cu b/cpp/src/stream_compaction/distinct_helpers.cu index cb0dc4b1c50..a9df0bc98b8 100644 --- a/cpp/src/stream_compaction/distinct_helpers.cu +++ b/cpp/src/stream_compaction/distinct_helpers.cu @@ -78,7 +78,7 @@ struct reduce_func_builder { } // namespace // This function is split from `distinct.cu` to improve compile time. -rmm::device_uvector indices_reduce_by_row( +rmm::device_uvector reduce_by_row( hash_map_type const& map, std::shared_ptr const preprocessed_input, size_type num_rows, diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index 9ae29783ca4..b667d0b04f0 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -72,7 +72,7 @@ auto constexpr reduction_init_value(duplicate_keep_option keep) * @param mr Device memory resource used to allocate the returned vector * @return A device_uvector containing the reduction results */ -rmm::device_uvector indices_reduce_by_row( +rmm::device_uvector reduce_by_row( hash_map_type const& map, std::shared_ptr const preprocessed_input, size_type num_rows, From 924a2d68bc3c5a180bafe4a461c05821106bc0f4 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 21:10:14 -0700 Subject: [PATCH 028/100] Implement histogram reduction --- cpp/src/reductions/histogram.cu | 180 ++++++++++++++++++++++++++++++-- 1 file changed, 171 insertions(+), 9 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 053ad62180b..262d1c94e89 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -14,28 +14,190 @@ * limitations under the License. */ -#include +#include +#include -#include +#include +#include +#include -#include -#include +#include -#include +#include namespace cudf::reduction::detail { -std::unique_ptr histogram(column_view const& input, +namespace { + +/** + * @brief The functor to compute the occurences of each unique rows in the input table. + */ +template +struct reduce_fn : cudf::detail::reduce_by_row_fn_base { + reduce_fn(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + OutputType* const d_output) + : cudf::detail::reduce_by_row_fn_base{ + d_map, d_hasher, d_equal, d_output} + { + } + + // Count the number of rows in each group of rows that are compared equal. + __device__ void operator()(size_type const idx) const + { + cuda::atomic_ref count(*this->get_output_ptr(idx)); + count.fetch_add(OutputType{1}, cuda::std::memory_order_relaxed); + } +}; + +/** + * @brief The builder to construct an instance of `reduce_fn` functor. + */ +struct reduce_func_builder { + template + auto build(MapView const& d_map, + KeyHasher const& d_hasher, + KeyEqual const& d_equal, + OutputType* const d_output) + { + return reduce_fn{d_map, d_hasher, d_equal, d_output}; + } +}; + +template +struct is_none_zero { + T const* data; + __device__ bool operator()(size_type const idx) const { return data[idx] != T{0}; } +}; + +struct histogram_dispatcher { + template + static bool constexpr is_supported() + { + // Currently only int64_t is requested by Spark-Rapids. + // More data type can be supported by enabling it below. + return std::is_same_v; + } + + template + std::enable_if_t(), void> operator()(Args&&...) + { + CUDF_FAIL("Unsupported output type in histogram aggregation."); + } + + template ())> + void operator()( + cudf::detail::hash_map_type const& map, + std::shared_ptr const preprocessed_input, + size_type num_rows, + cudf::nullate::DYNAMIC has_nulls, + bool has_nested_columns, + mutable_column_view const& output, + rmm::cuda_stream_view stream) const + { + auto const reduction_results = + cudf::detail::hash_reduce_by_row(map, + preprocessed_input, + num_rows, + has_nulls, + has_nested_columns, + null_equality::EQUAL, + nan_equality::ALL_EQUAL, + reduce_func_builder{}, + OutputType{0}, + stream, + rmm::mr::get_current_device_resource()); + + // Reduction results are either group sizes of equal rows, or `0`. + // Thus, we only needs to extract the non-zero group sizes. + thrust::copy_if(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_rows), + output.begin(), + is_none_zero{reduction_results.begin()}); + } +}; + +} // namespace + +std::unique_ptr histogram(table_view const& input, data_type const output_dtype, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::is_integral(output_dtype), - "The output type of histogram aggregation must be an integral type."); + CUDF_EXPECTS(cudf::is_integral(output_dtype) && + (cudf::size_of(output_dtype) == 4 || cudf::size_of(output_dtype) == 8), + "The output type of histogram aggregation must be an 32/64bit integral type."); + auto map = cudf::detail::hash_map_type{ + compute_hash_table_size(input.num_rows()), + cuco::empty_key{cudf::detail::COMPACTION_EMPTY_KEY_SENTINEL}, + cuco::empty_value{cudf::detail::COMPACTION_EMPTY_VALUE_SENTINEL}, + cudf::detail::hash_table_allocator_type{default_allocator{}, stream}, + stream.value()}; + auto const preprocessed_input = + cudf::experimental::row::hash::preprocessed_table::create(input, stream); + auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; + auto const has_nested_columns = cudf::detail::has_nested_columns(input); - return nullptr; + auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); + auto const key_hasher = + cudf::detail::experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); + + auto const pair_iter = cudf::detail::make_counting_transform_iterator( + size_type{0}, [] __device__(size_type const i) { return cuco::make_pair(i, i); }); + + using nan_equal_comparator = + cudf::experimental::row::equality::nan_equal_physical_equality_comparator; + auto const value_comp = nan_equal_comparator{}; + if (has_nested_columns) { + auto const key_equal = row_comp.equal_to(has_nulls, null_equality::EQUAL, value_comp); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + } else { + auto const key_equal = row_comp.equal_to(has_nulls, null_equality::EQUAL, value_comp); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + } + + // Gather the indices of distinct rows. + auto distinct_indices = cudf::make_numeric_column(data_type{type_to_id()}, + static_cast(map.get_size()), + mask_state::UNALLOCATED, + stream, + mr); + map.retrieve_all(distinct_indices->mutable_view().begin(), + thrust::make_discard_iterator(), + stream.value()); + + // Count the number of occurences of each unique row. + auto unique_counts = make_numeric_column( + output_dtype, static_cast(map.get_size()), mask_state::UNALLOCATED, stream, mr); + type_dispatcher(output_dtype, + histogram_dispatcher{}, + map, + std::move(preprocessed_input), + input.num_rows(), + has_nulls, + has_nested_columns, + unique_counts->mutable_view(), + stream); + + std::vector> output_children; + output_children.emplace_back(std::move(distinct_indices)); + output_children.emplace_back(std::move(unique_counts)); + + return make_structs_column( + static_cast(map.get_size()), std::move(output_children), 0, {}, stream, mr); +} + +std::unique_ptr histogram(column_view const& input, + data_type const output_dtype, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return histogram(table_view{{input}}, output_dtype, stream, mr); } std::unique_ptr merge_histogram(column_view const& input, From a1b516e119c8abacc12321c144ca682c05df5b74 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 12 Sep 2023 22:22:47 -0700 Subject: [PATCH 029/100] Support partial count --- cpp/src/reductions/histogram.cu | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 262d1c94e89..213d5ae0cd1 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -34,12 +34,18 @@ namespace { */ template struct reduce_fn : cudf::detail::reduce_by_row_fn_base { + OutputType const* d_partial_output; + reduce_fn(MapView const& d_map, KeyHasher const& d_hasher, KeyEqual const& d_equal, - OutputType* const d_output) - : cudf::detail::reduce_by_row_fn_base{ - d_map, d_hasher, d_equal, d_output} + OutputType* const d_output, + OutputType const* const d_partial_output = nullptr) + : cudf::detail::reduce_by_row_fn_base{d_map, + d_hasher, + d_equal, + d_output}, + d_partial_output{d_partial_output} { } @@ -47,7 +53,11 @@ struct reduce_fn : cudf::detail::reduce_by_row_fn_base count(*this->get_output_ptr(idx)); - count.fetch_add(OutputType{1}, cuda::std::memory_order_relaxed); + if (d_partial_output) { + count.fetch_add(d_partial_output[idx], cuda::std::memory_order_relaxed); + } else { + count.fetch_add(OutputType{1}, cuda::std::memory_order_relaxed); + } } }; From e196ab4d5d6466594c4262037b6e6a3b42f442dc Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 11:09:13 -0700 Subject: [PATCH 030/100] Return list scalar of structs --- cpp/src/reductions/histogram.cu | 67 +++++++++++++++++---------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 213d5ae0cd1..6a6522413b8 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -18,7 +18,9 @@ #include #include +#include #include +#include #include #include @@ -131,7 +133,7 @@ struct histogram_dispatcher { } // namespace -std::unique_ptr histogram(table_view const& input, +std::unique_ptr histogram(column_view const& input, data_type const output_dtype, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -141,16 +143,17 @@ std::unique_ptr histogram(table_view const& input, "The output type of histogram aggregation must be an 32/64bit integral type."); auto map = cudf::detail::hash_map_type{ - compute_hash_table_size(input.num_rows()), + compute_hash_table_size(input.size()), cuco::empty_key{cudf::detail::COMPACTION_EMPTY_KEY_SENTINEL}, cuco::empty_value{cudf::detail::COMPACTION_EMPTY_VALUE_SENTINEL}, cudf::detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; + auto const input_tview = table_view{{input}}; auto const preprocessed_input = - cudf::experimental::row::hash::preprocessed_table::create(input, stream); - auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; - auto const has_nested_columns = cudf::detail::has_nested_columns(input); + cudf::experimental::row::hash::preprocessed_table::create(input_tview, stream); + auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input_tview)}; + auto const has_nested_columns = cudf::detail::has_nested_columns(input_tview); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); auto const key_hasher = @@ -165,21 +168,25 @@ std::unique_ptr histogram(table_view const& input, auto const value_comp = nan_equal_comparator{}; if (has_nested_columns) { auto const key_equal = row_comp.equal_to(has_nulls, null_equality::EQUAL, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + map.insert(pair_iter, pair_iter + input.size(), key_hasher, key_equal, stream.value()); } else { auto const key_equal = row_comp.equal_to(has_nulls, null_equality::EQUAL, value_comp); - map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); + map.insert(pair_iter, pair_iter + input.size(), key_hasher, key_equal, stream.value()); } - // Gather the indices of distinct rows. - auto distinct_indices = cudf::make_numeric_column(data_type{type_to_id()}, - static_cast(map.get_size()), - mask_state::UNALLOCATED, - stream, - mr); - map.retrieve_all(distinct_indices->mutable_view().begin(), - thrust::make_discard_iterator(), - stream.value()); + // Gather the indices of distinct rows and distinct rows. + auto distinct_indices = rmm::device_uvector( + static_cast(map.get_size()), stream, rmm::mr::get_current_device_resource()); + map.retrieve_all(distinct_indices.begin(), thrust::make_discard_iterator(), stream.value()); + auto distinct_rows = + std::move(cudf::detail::gather(input_tview, + distinct_indices, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release() + .front()); // Count the number of occurences of each unique row. auto unique_counts = make_numeric_column( @@ -188,37 +195,31 @@ std::unique_ptr histogram(table_view const& input, histogram_dispatcher{}, map, std::move(preprocessed_input), - input.num_rows(), + input.size(), has_nulls, has_nested_columns, unique_counts->mutable_view(), stream); - std::vector> output_children; - output_children.emplace_back(std::move(distinct_indices)); - output_children.emplace_back(std::move(unique_counts)); + std::vector> struct_children; + struct_children.emplace_back(std::move(distinct_rows)); + struct_children.emplace_back(std::move(unique_counts)); + auto output_structs = make_structs_column( + static_cast(map.get_size()), std::move(struct_children), 0, {}, stream, mr); - return make_structs_column( - static_cast(map.get_size()), std::move(output_children), 0, {}, stream, mr); + return std::make_unique( + std::move(*output_structs.release()), true, stream, mr); } -std::unique_ptr histogram(column_view const& input, - data_type const output_dtype, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - return histogram(table_view{{input}}, output_dtype, stream, mr); -} - -std::unique_ptr merge_histogram(column_view const& input, +std::unique_ptr merge_histogram(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS( input.type().id() == type_id::STRUCT && input.num_children() == 2, "The input of merge_histogram aggregation must be a struct column having two children."); - CUDF_EXPECTS(cudf::is_integral(input.child(1).type()), - "The second child of the input column must be an integer type."); + CUDF_EXPECTS(input.child(1).type().id() == type_id::INT64, + "The second child of the input column must be INT64 type."); return nullptr; } From 09f68afb295455b76c015e45c3065bcaa1d38c63 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 11:29:02 -0700 Subject: [PATCH 031/100] Add factory functions for histogram and merge histogram --- cpp/include/cudf/aggregation.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 359c53dff60..b4491b68da2 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -290,6 +290,11 @@ std::unique_ptr make_any_aggregation(); template std::unique_ptr make_all_aggregation(); +/// Factory to create a HISTOGRAM aggregation +/// @return A HISTOGRAM aggregation object +template +std::unique_ptr make_histogram_aggregation(); + /// Factory to create a SUM_OF_SQUARES aggregation /// @return A SUM_OF_SQUARES aggregation object template @@ -612,6 +617,13 @@ std::unique_ptr make_merge_sets_aggregation( template std::unique_ptr make_merge_m2_aggregation(); +/** + * @brief make_merge_m2_aggregation + * @return + */ +template +std::unique_ptr make_merge_histogram_aggregation(); + /** * @brief Factory to create a COVARIANCE aggregation * From f107d9876e8f78c9bffe8e7f7c1fa42ef6181a3d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 11:41:45 -0700 Subject: [PATCH 032/100] Fix aggregation dispatcher --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 345977384f3..930ec992384 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1461,6 +1461,8 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind return f.template operator()(std::forward(args)...); case aggregation::COUNT_ALL: return f.template operator()(std::forward(args)...); + case aggregation::HISTOGRAM: + return f.template operator()(std::forward(args)...); case aggregation::ANY: return f.template operator()(std::forward(args)...); case aggregation::ALL: @@ -1504,6 +1506,8 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind return f.template operator()(std::forward(args)...); case aggregation::MERGE_M2: return f.template operator()(std::forward(args)...); + case aggregation::MERGE_HISTOGRAM: + return f.template operator()(std::forward(args)...); case aggregation::COVARIANCE: return f.template operator()(std::forward(args)...); case aggregation::CORRELATION: From cc185d8b4b9fe555b72aa8be4537d7c99e7f6740 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 12:59:04 -0700 Subject: [PATCH 033/100] Fix bug --- cpp/src/reductions/histogram.cu | 68 ++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 6a6522413b8..f45f1199a39 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include #include @@ -77,10 +79,12 @@ struct reduce_func_builder { } }; -template struct is_none_zero { - T const* data; - __device__ bool operator()(size_type const idx) const { return data[idx] != T{0}; } + template + __device__ bool operator()(Pair const inp_pair) const + { + return thrust::get<1>(inp_pair) != 0; + } }; struct histogram_dispatcher { @@ -105,7 +109,8 @@ struct histogram_dispatcher { size_type num_rows, cudf::nullate::DYNAMIC has_nulls, bool has_nested_columns, - mutable_column_view const& output, + size_type* output_indices, + mutable_column_view const& output_count, rmm::cuda_stream_view stream) const { auto const reduction_results = @@ -121,13 +126,28 @@ struct histogram_dispatcher { stream, rmm::mr::get_current_device_resource()); + column_view cv = column_view(data_type{type_id::INT64}, + (int)reduction_results.size(), + reduction_results.data(), + nullptr, + 0); + printf("reduction result, num rows = %d\n", num_rows); + cudf::test::print(cv); + + auto const input_it = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); + + auto const output_it = thrust::make_zip_iterator( + thrust::make_tuple(output_indices, output_count.begin())); + + thrust::copy_if(rmm::exec_policy(stream), + input_it, + input_it + num_rows, + output_it, + is_none_zero{}); + // Reduction results are either group sizes of equal rows, or `0`. // Thus, we only needs to extract the non-zero group sizes. - thrust::copy_if(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - output.begin(), - is_none_zero{reduction_results.begin()}); } }; @@ -177,19 +197,10 @@ std::unique_ptr histogram(column_view const& input, // Gather the indices of distinct rows and distinct rows. auto distinct_indices = rmm::device_uvector( static_cast(map.get_size()), stream, rmm::mr::get_current_device_resource()); - map.retrieve_all(distinct_indices.begin(), thrust::make_discard_iterator(), stream.value()); - auto distinct_rows = - std::move(cudf::detail::gather(input_tview, - distinct_indices, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr) - ->release() - .front()); + // map.retrieve_all(distinct_indices.begin(), thrust::make_discard_iterator(), stream.value()); // Count the number of occurences of each unique row. - auto unique_counts = make_numeric_column( + auto distinct_counts = make_numeric_column( output_dtype, static_cast(map.get_size()), mask_state::UNALLOCATED, stream, mr); type_dispatcher(output_dtype, histogram_dispatcher{}, @@ -198,12 +209,25 @@ std::unique_ptr histogram(column_view const& input, input.size(), has_nulls, has_nested_columns, - unique_counts->mutable_view(), + distinct_indices.begin(), + distinct_counts->mutable_view(), stream); + auto distinct_rows = + std::move(cudf::detail::gather(input_tview, + distinct_indices, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release() + .front()); + printf("reduction result 2\n"); + cudf::test::print(distinct_counts->view()); + std::vector> struct_children; struct_children.emplace_back(std::move(distinct_rows)); - struct_children.emplace_back(std::move(unique_counts)); + struct_children.emplace_back(std::move(distinct_counts)); auto output_structs = make_structs_column( static_cast(map.get_size()), std::move(struct_children), 0, {}, stream, mr); From 547be01d3939b3474c52a05acbf54c70250174c6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 12:59:21 -0700 Subject: [PATCH 034/100] Working test --- cpp/tests/reductions/reduction_tests.cpp | 45 ++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 2561f3f9886..c8ff6645b16 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -119,6 +120,7 @@ struct ReductionTest : public cudf::test::BaseFixture { } }; +#if 0 template struct MinMaxReductionTest : public ReductionTest {}; @@ -292,8 +294,10 @@ TYPED_TEST(SumReductionTest, Sum) .second); } -TYPED_TEST_SUITE(ReductionTest, cudf::test::NumericTypes); +#endif +TYPED_TEST_SUITE(ReductionTest, cudf::test::FloatingPointTypes); +#if 0 TYPED_TEST(ReductionTest, Product) { using T = TypeParam; @@ -379,6 +383,43 @@ TYPED_TEST(ReductionTest, SumOfSquare) expected_null_value); } +#endif + +TYPED_TEST(ReductionTest, Histogram) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using int64_data = cudf::test::fixed_width_column_wrapper; + using structs_col = cudf::test::structs_column_wrapper; + + auto const agg = cudf::make_histogram_aggregation(); + + // Test without nulls. + { + auto const input = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto const expected = [] { + auto child1 = col_data{-3, -2, 0, 1, 2, 5}; + auto child2 = int64_data{2, 1, 1, 2, 4, 1}; + return structs_col{{child1, child2}}; + }(); + // auto const input = col_data{1, 2, 3, 1, 2}; + // auto const expected = [] { + // auto child1 = col_data{1, 2, 3}; + // auto child2 = int64_data{2, 2, 1}; + // return structs_col{{child1, child2}}; + // }(); + auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + auto const result_col = dynamic_cast(result.get())->view(); + cudf::test::print(result_col); + + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); + cudf::test::print(sorted_result->get_column(0).view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + } +} + +#if 0 template struct ReductionAnyAllTest : public ReductionTest {}; using AnyAllTypes = cudf::test::Types; @@ -2936,5 +2977,5 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls) *cudf::make_max_aggregation()); } } - +#endif CUDF_TEST_PROGRAM_MAIN() From 4d93b1e0bf14871562d7838235e50d0c3228f9b2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 13:18:06 -0700 Subject: [PATCH 035/100] Implement merge histogram --- cpp/src/reductions/histogram.cu | 87 ++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 34 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index f45f1199a39..e0cd1586756 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -29,6 +29,8 @@ #include +#include + namespace cudf::reduction::detail { namespace { @@ -44,7 +46,7 @@ struct reduce_fn : cudf::detail::reduce_by_row_fn_base{d_map, d_hasher, d_equal, @@ -68,19 +70,27 @@ struct reduce_fn : cudf::detail::reduce_by_row_fn_base struct reduce_func_builder { - template + OutputType const* const d_partial_output; + + reduce_func_builder(OutputType const* const d_partial_output) : d_partial_output{d_partial_output} + { + } + + template auto build(MapView const& d_map, KeyHasher const& d_hasher, KeyEqual const& d_equal, OutputType* const d_output) { - return reduce_fn{d_map, d_hasher, d_equal, d_output}; + return reduce_fn{ + d_map, d_hasher, d_equal, d_output, d_partial_output}; } }; struct is_none_zero { - template + template __device__ bool operator()(Pair const inp_pair) const { return thrust::get<1>(inp_pair) != 0; @@ -110,21 +120,23 @@ struct histogram_dispatcher { cudf::nullate::DYNAMIC has_nulls, bool has_nested_columns, size_type* output_indices, - mutable_column_view const& output_count, + mutable_column_view const& output_counts, + std::optional const& partial_counts, rmm::cuda_stream_view stream) const { - auto const reduction_results = - cudf::detail::hash_reduce_by_row(map, - preprocessed_input, - num_rows, - has_nulls, - has_nested_columns, - null_equality::EQUAL, - nan_equality::ALL_EQUAL, - reduce_func_builder{}, - OutputType{0}, - stream, - rmm::mr::get_current_device_resource()); + auto const reduction_results = cudf::detail::hash_reduce_by_row( + map, + preprocessed_input, + num_rows, + has_nulls, + has_nested_columns, + null_equality::EQUAL, + nan_equality::ALL_EQUAL, + reduce_func_builder{partial_counts ? partial_counts.value().begin() + : nullptr}, + OutputType{0}, + stream, + rmm::mr::get_current_device_resource()); column_view cv = column_view(data_type{type_id::INT64}, (int)reduction_results.size(), @@ -138,13 +150,10 @@ struct histogram_dispatcher { thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); auto const output_it = thrust::make_zip_iterator( - thrust::make_tuple(output_indices, output_count.begin())); + thrust::make_tuple(output_indices, output_counts.begin())); - thrust::copy_if(rmm::exec_policy(stream), - input_it, - input_it + num_rows, - output_it, - is_none_zero{}); + thrust::copy_if( + rmm::exec_policy(stream), input_it, input_it + num_rows, output_it, is_none_zero{}); // Reduction results are either group sizes of equal rows, or `0`. // Thus, we only needs to extract the non-zero group sizes. @@ -153,7 +162,8 @@ struct histogram_dispatcher { } // namespace -std::unique_ptr histogram(column_view const& input, +std::unique_ptr histogram(table_view const& input, + std::optional const& partial_distinct_counts, data_type const output_dtype, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -163,17 +173,16 @@ std::unique_ptr histogram(column_view const& input, "The output type of histogram aggregation must be an 32/64bit integral type."); auto map = cudf::detail::hash_map_type{ - compute_hash_table_size(input.size()), + compute_hash_table_size(input.num_rows()), cuco::empty_key{cudf::detail::COMPACTION_EMPTY_KEY_SENTINEL}, cuco::empty_value{cudf::detail::COMPACTION_EMPTY_VALUE_SENTINEL}, cudf::detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; - auto const input_tview = table_view{{input}}; auto const preprocessed_input = - cudf::experimental::row::hash::preprocessed_table::create(input_tview, stream); - auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input_tview)}; - auto const has_nested_columns = cudf::detail::has_nested_columns(input_tview); + cudf::experimental::row::hash::preprocessed_table::create(input, stream); + auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; + auto const has_nested_columns = cudf::detail::has_nested_columns(input); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); auto const key_hasher = @@ -188,10 +197,10 @@ std::unique_ptr histogram(column_view const& input, auto const value_comp = nan_equal_comparator{}; if (has_nested_columns) { auto const key_equal = row_comp.equal_to(has_nulls, null_equality::EQUAL, value_comp); - map.insert(pair_iter, pair_iter + input.size(), key_hasher, key_equal, stream.value()); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); } else { auto const key_equal = row_comp.equal_to(has_nulls, null_equality::EQUAL, value_comp); - map.insert(pair_iter, pair_iter + input.size(), key_hasher, key_equal, stream.value()); + map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); } // Gather the indices of distinct rows and distinct rows. @@ -206,15 +215,16 @@ std::unique_ptr histogram(column_view const& input, histogram_dispatcher{}, map, std::move(preprocessed_input), - input.size(), + input.num_rows(), has_nulls, has_nested_columns, distinct_indices.begin(), distinct_counts->mutable_view(), + partial_distinct_counts, stream); auto distinct_rows = - std::move(cudf::detail::gather(input_tview, + std::move(cudf::detail::gather(input, distinct_indices, out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, @@ -235,6 +245,14 @@ std::unique_ptr histogram(column_view const& input, std::move(*output_structs.release()), true, stream, mr); } +std::unique_ptr histogram(column_view const& input, + data_type const output_dtype, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return histogram(table_view{{input}}, std::nullopt, output_dtype, stream, mr); +} + std::unique_ptr merge_histogram(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -245,7 +263,8 @@ std::unique_ptr merge_histogram(column_view const& input, CUDF_EXPECTS(input.child(1).type().id() == type_id::INT64, "The second child of the input column must be INT64 type."); - return nullptr; + return histogram( + table_view{{input.child(0)}}, input.child(1), data_type{type_id::INT64}, stream, mr); } } // namespace cudf::reduction::detail From 6d8be79456adf660df6b87a1ae6bc287a5c6ddb2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 13:18:12 -0700 Subject: [PATCH 036/100] Add test for merge histogram --- cpp/tests/reductions/reduction_tests.cpp | 39 ++++++++++++++++++++---- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index c8ff6645b16..47f276b8d07 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -401,12 +401,39 @@ TYPED_TEST(ReductionTest, Histogram) auto child2 = int64_data{2, 1, 1, 2, 4, 1}; return structs_col{{child1, child2}}; }(); - // auto const input = col_data{1, 2, 3, 1, 2}; - // auto const expected = [] { - // auto child1 = col_data{1, 2, 3}; - // auto child2 = int64_data{2, 2, 1}; - // return structs_col{{child1, child2}}; - // }(); + auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + auto const result_col = dynamic_cast(result.get())->view(); + cudf::test::print(result_col); + + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); + cudf::test::print(sorted_result->get_column(0).view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + } +} + +TYPED_TEST(ReductionTest, MergeHistogram) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using int64_data = cudf::test::fixed_width_column_wrapper; + using structs_col = cudf::test::structs_column_wrapper; + + auto const agg = cudf::make_merge_histogram_aggregation(); + + // Test without nulls. + { + auto const input = [] { + auto child1 = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto child2 = int64_data{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; + return structs_col{{child1, child2}}; + }(); + + auto const expected = [] { + auto child1 = col_data{-3, -2, 0, 1, 2, 5}; + auto child2 = int64_data{5, 5, 4, 5, 8, 1}; + return structs_col{{child1, child2}}; + }(); auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); auto const result_col = dynamic_cast(result.get())->view(); cudf::test::print(result_col); From 2d085397ffea6bd14681038dc00fdbe21875cc41 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 13:20:45 -0700 Subject: [PATCH 037/100] Cleanup --- cpp/src/reductions/histogram.cu | 10 +++++----- cpp/tests/reductions/reduction_tests.cpp | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index e0cd1586756..4550c65e033 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +//#include #include #include @@ -143,8 +143,8 @@ struct histogram_dispatcher { reduction_results.data(), nullptr, 0); - printf("reduction result, num rows = %d\n", num_rows); - cudf::test::print(cv); +// printf("reduction result, num rows = %d\n", num_rows); +// cudf::test::print(cv); auto const input_it = thrust::make_zip_iterator( thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); @@ -232,8 +232,8 @@ std::unique_ptr histogram(table_view const& input, mr) ->release() .front()); - printf("reduction result 2\n"); - cudf::test::print(distinct_counts->view()); +// printf("reduction result 2\n"); +// cudf::test::print(distinct_counts->view()); std::vector> struct_children; struct_children.emplace_back(std::move(distinct_rows)); diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 47f276b8d07..02bbafd1bef 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -403,11 +403,11 @@ TYPED_TEST(ReductionTest, Histogram) }(); auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); auto const result_col = dynamic_cast(result.get())->view(); - cudf::test::print(result_col); + // cudf::test::print(result_col); auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - cudf::test::print(sorted_result->get_column(0).view()); + // cudf::test::print(sorted_result->get_column(0).view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); } @@ -436,11 +436,11 @@ TYPED_TEST(ReductionTest, MergeHistogram) }(); auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); auto const result_col = dynamic_cast(result.get())->view(); - cudf::test::print(result_col); + // cudf::test::print(result_col); auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - cudf::test::print(sorted_result->get_column(0).view()); + // cudf::test::print(sorted_result->get_column(0).view()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); } From 7999c7eed9bbe9ab319c18dc5f94e06d25111e7c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 14:28:51 -0700 Subject: [PATCH 038/100] Cleanup --- cpp/src/reductions/histogram.cu | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 4550c65e033..a4aba384c46 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -//#include +// #include #include #include @@ -143,8 +143,8 @@ struct histogram_dispatcher { reduction_results.data(), nullptr, 0); -// printf("reduction result, num rows = %d\n", num_rows); -// cudf::test::print(cv); + // printf("reduction result, num rows = %d\n", num_rows); + // cudf::test::print(cv); auto const input_it = thrust::make_zip_iterator( thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); @@ -232,8 +232,8 @@ std::unique_ptr histogram(table_view const& input, mr) ->release() .front()); -// printf("reduction result 2\n"); -// cudf::test::print(distinct_counts->view()); + // printf("reduction result 2\n"); + // cudf::test::print(distinct_counts->view()); std::vector> struct_children; struct_children.emplace_back(std::move(distinct_rows)); @@ -260,8 +260,9 @@ std::unique_ptr merge_histogram(column_view const& input, CUDF_EXPECTS( input.type().id() == type_id::STRUCT && input.num_children() == 2, "The input of merge_histogram aggregation must be a struct column having two children."); - CUDF_EXPECTS(input.child(1).type().id() == type_id::INT64, - "The second child of the input column must be INT64 type."); + CUDF_EXPECTS(input.child(1).type().id() == type_id::INT64 && !input.child(1).has_nulls(), + "The second child of the input column must be INT64 type and has no nulls."); + CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls."); return histogram( table_view{{input.child(0)}}, input.child(1), data_type{type_id::INT64}, stream, mr); From 2d47048a1d1cfaa03d1540365e1106d8ef587f5a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 14:32:22 -0700 Subject: [PATCH 039/100] Add tests with nulls --- cpp/tests/reductions/reduction_tests.cpp | 53 +++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 02bbafd1bef..c1ee5ec4e76 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -387,7 +387,7 @@ TYPED_TEST(ReductionTest, SumOfSquare) TYPED_TEST(ReductionTest, Histogram) { - using col_data = cudf::test::fixed_width_column_wrapper; + using col_data = cudf::test::fixed_width_column_wrapper; using int64_data = cudf::test::fixed_width_column_wrapper; using structs_col = cudf::test::structs_column_wrapper; @@ -411,6 +411,29 @@ TYPED_TEST(ReductionTest, Histogram) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); } + + // Test with nulls. + { + using namespace cudf::test::iterators; + auto constexpr null{0}; + + auto const input = col_data{{null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, + nulls_at({0, 6, 9, 12})}; + auto const expected = [] { + auto child1 = col_data{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_data{4, 2, 1, 1, 2, 4, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + auto const result_col = dynamic_cast(result.get())->view(); + // cudf::test::print(result_col); + + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); + // cudf::test::print(sorted_result->get_column(0).view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + } } TYPED_TEST(ReductionTest, MergeHistogram) @@ -444,6 +467,34 @@ TYPED_TEST(ReductionTest, MergeHistogram) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); } + + // Test with nulls. + { + using namespace cudf::test::iterators; + auto constexpr null{0}; + + auto const input = [] { + auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, + nulls_at({2, 5, 8, 11, 15})}; + auto child2 = int64_data{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; + return structs_col{{child1, child2}}; + }(); + + auto const expected = [] { + auto child1 = col_data{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_data{67, 5, 5, 4, 5, 8, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + auto const result_col = dynamic_cast(result.get())->view(); + // cudf::test::print(result_col); + + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); + // cudf::test::print(sorted_result->get_column(0).view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + } } #if 0 From 824dcad85be0a4588ea151213a76b2463e254254 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 14:52:59 -0700 Subject: [PATCH 040/100] Add sliced input tests --- cpp/tests/reductions/reduction_tests.cpp | 98 ++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index c1ee5ec4e76..e61db56ad52 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -412,6 +412,26 @@ TYPED_TEST(ReductionTest, Histogram) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); } + // Test without nulls, sliced input. + { + auto const input_original = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto const input = cudf::slice(input_original, {0, 7})[0]; + auto const expected = [] { + auto child1 = col_data{-3, 0, 1, 2, 5}; + auto child2 = int64_data{1, 1, 1, 3, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + auto const result_col = dynamic_cast(result.get())->view(); + // cudf::test::print(result_col); + + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); + // cudf::test::print(sorted_result->get_column(0).view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + } + // Test with nulls. { using namespace cudf::test::iterators; @@ -434,6 +454,30 @@ TYPED_TEST(ReductionTest, Histogram) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); } + + // Test with nulls, sliced input. + { + using namespace cudf::test::iterators; + auto constexpr null{0}; + + auto const input_original = col_data{ + {null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, nulls_at({0, 6, 9, 12})}; + auto const input = cudf::slice(input_original, {0, 9})[0]; + auto const expected = [] { + auto child1 = col_data{{null, -3, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_data{2, 1, 1, 1, 3, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + auto const result_col = dynamic_cast(result.get())->view(); + // cudf::test::print(result_col); + + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); + // cudf::test::print(sorted_result->get_column(0).view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + } } TYPED_TEST(ReductionTest, MergeHistogram) @@ -468,6 +512,31 @@ TYPED_TEST(ReductionTest, MergeHistogram) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); } + // Test without nulls, sliced input. + { + auto const input_original = [] { + auto child1 = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto child2 = int64_data{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; + return structs_col{{child1, child2}}; + }(); + auto const input = cudf::slice(input_original, {0, 7})[0]; + + auto const expected = [] { + auto child1 = col_data{-3, 0, 1, 2, 5}; + auto child2 = int64_data{2, 4, 1, 5, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + auto const result_col = dynamic_cast(result.get())->view(); + // cudf::test::print(result_col); + + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); + // cudf::test::print(sorted_result->get_column(0).view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + } + // Test with nulls. { using namespace cudf::test::iterators; @@ -495,6 +564,35 @@ TYPED_TEST(ReductionTest, MergeHistogram) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); } + + // Test with nulls, sliced input. + { + using namespace cudf::test::iterators; + auto constexpr null{0}; + + auto const input_original = [] { + auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, + nulls_at({2, 5, 8, 11, 15})}; + auto child2 = int64_data{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; + return structs_col{{child1, child2}}; + }(); + auto const input = cudf::slice(input_original, {0, 9})[0]; + + auto const expected = [] { + auto child1 = col_data{{null, -3, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_data{33, 2, 4, 1, 3, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + auto const result_col = dynamic_cast(result.get())->view(); + // cudf::test::print(result_col); + + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); + // cudf::test::print(sorted_result->get_column(0).view()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + } } #if 0 From 3fb43f488be2566b8f282565109b779712bb2e60 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 14:53:10 -0700 Subject: [PATCH 041/100] Fix sliced input --- cpp/src/reductions/histogram.cu | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index a4aba384c46..1665ef2bfea 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -264,8 +264,12 @@ std::unique_ptr merge_histogram(column_view const& input, "The second child of the input column must be INT64 type and has no nulls."); CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls."); - return histogram( - table_view{{input.child(0)}}, input.child(1), data_type{type_id::INT64}, stream, mr); + auto const structs_cv = structs_column_view{input}; + return histogram(table_view{{structs_cv.get_sliced_child(0, stream)}}, + structs_cv.get_sliced_child(1, stream), + data_type{type_id::INT64}, + stream, + mr); } } // namespace cudf::reduction::detail From ee229a00125c9a4c2edff32c73ca8d952c75b1e7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 15:24:05 -0700 Subject: [PATCH 042/100] Add binding for `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations --- .../main/java/ai/rapids/cudf/Aggregation.java | 24 ++++++++++++++++++- .../ai/rapids/cudf/GroupByAggregation.java | 8 +++++++ java/src/main/native/src/AggregationJni.cpp | 5 ++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index d10329ca0f2..029017ae113 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -68,7 +68,9 @@ enum Kind { DENSE_RANK(29), PERCENT_RANK(30), TDIGEST(31), // This can take a delta argument for accuracy level - MERGE_TDIGEST(32); // This can take a delta argument for accuracy level + MERGE_TDIGEST(32), // This can take a delta argument for accuracy level + HISTOGRAM(33), + MERGE_HISTOGRAM(34); final int nativeId; @@ -918,6 +920,26 @@ static TDigestAggregation mergeTDigest(int delta) { return new TDigestAggregation(Kind.MERGE_TDIGEST, delta); } + static final class HistogramAggregation extends NoParamAggregation { + private HistogramAggregation() { + super(Kind.HISTOGRAM); + } + } + + static final class MergeHistogramAggregation extends NoParamAggregation { + private MergeHistogramAggregation() { + super(Kind.MERGE_HISTOGRAM); + } + } + + static HistogramAggregation histogram() { + return new HistogramAggregation(); + } + + static MergeHistogramAggregation mergeHistogram() { + return new MergeHistogramAggregation(); + } + /** * Create one of the aggregations that only needs a kind, no other parameters. This does not * work for all types and for code safety reasons each kind is added separately. diff --git a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java index 500d18f7eae..25bb716bd5a 100644 --- a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java @@ -315,4 +315,12 @@ public static GroupByAggregation createTDigest(int delta) { public static GroupByAggregation mergeTDigest(int delta) { return new GroupByAggregation(Aggregation.mergeTDigest(delta)); } + + public static GroupByAggregation histogram() { + return new GroupByAggregation(Aggregation.histogram()); + } + + public static GroupByAggregation mergeHistogram() { + return new GroupByAggregation(Aggregation.mergeHistogram()); + } } diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index 6ac73282615..8984c27530d 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -90,6 +90,11 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createNoParamAgg(JNIEnv case 30: // ANSI SQL PERCENT_RANK return cudf::make_rank_aggregation(cudf::rank_method::MIN, {}, cudf::null_policy::INCLUDE, {}, cudf::rank_percentage::ONE_NORMALIZED); + case 33: // HISTOGRAM + return cudf::make_histogram_aggregation(); + case 34: // MERGE_HISTOGRAM + return cudf::make_merge_histogram_aggregation(); + default: throw std::logic_error("Unsupported No Parameter Aggregation Operation"); } }(); From b71c7a8a99b0e257eb1f485e345904cf2313aac5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 17:33:34 -0700 Subject: [PATCH 043/100] Fix compiling issue --- cpp/src/reductions/hash_reduce_by_row.cuh | 2 +- cpp/src/reductions/histogram.cu | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/src/reductions/hash_reduce_by_row.cuh index 35654b90bc0..bc42c1473c1 100644 --- a/cpp/src/reductions/hash_reduce_by_row.cuh +++ b/cpp/src/reductions/hash_reduce_by_row.cuh @@ -124,7 +124,7 @@ rmm::device_uvector hash_reduce_by_row( { auto const map_dview = map.get_device_view(); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); + auto const key_hasher = row_hasher.device_hasher(has_nulls); auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); auto reduction_results = rmm::device_uvector(num_rows, stream, mr); diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 1665ef2bfea..72ed3026580 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -174,8 +174,8 @@ std::unique_ptr histogram(table_view const& input, auto map = cudf::detail::hash_map_type{ compute_hash_table_size(input.num_rows()), - cuco::empty_key{cudf::detail::COMPACTION_EMPTY_KEY_SENTINEL}, - cuco::empty_value{cudf::detail::COMPACTION_EMPTY_VALUE_SENTINEL}, + cuco::empty_key{-1}, + cuco::empty_value{std::numeric_limits::min()}, cudf::detail::hash_table_allocator_type{default_allocator{}, stream}, stream.value()}; @@ -185,9 +185,8 @@ std::unique_ptr histogram(table_view const& input, auto const has_nested_columns = cudf::detail::has_nested_columns(input); auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = - cudf::detail::experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); + auto const key_hasher = row_hasher.device_hasher(has_nulls); + auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); auto const pair_iter = cudf::detail::make_counting_transform_iterator( size_type{0}, [] __device__(size_type const i) { return cuco::make_pair(i, i); }); From 1edeb4ce90bb1eeef3691acd4a0b8f342924f4cb Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 13 Sep 2023 17:34:58 -0700 Subject: [PATCH 044/100] Remove header --- cpp/src/reductions/hash_reduce_by_row.cuh | 164 ---------------------- cpp/src/reductions/histogram.cu | 2 +- 2 files changed, 1 insertion(+), 165 deletions(-) delete mode 100644 cpp/src/reductions/hash_reduce_by_row.cuh diff --git a/cpp/src/reductions/hash_reduce_by_row.cuh b/cpp/src/reductions/hash_reduce_by_row.cuh deleted file mode 100644 index bc42c1473c1..00000000000 --- a/cpp/src/reductions/hash_reduce_by_row.cuh +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include - -namespace cudf::detail { - -/** - * @brief The base struct for customized reduction functor to perform reduce-by-key with keys are - * rows that compared equal. - * - * TODO: We need to switch to use `static_reduction_map` when it is ready - * (https://github.com/NVIDIA/cuCollections/pull/98). - */ -template -struct reduce_by_row_fn_base { - protected: - MapView const d_map; - KeyHasher const d_hasher; - KeyEqual const d_equal; - OutputType* const d_output; - - reduce_by_row_fn_base(MapView const& d_map, - KeyHasher const& d_hasher, - KeyEqual const& d_equal, - OutputType* const d_output) - : d_map{d_map}, d_hasher{d_hasher}, d_equal{d_equal}, d_output{d_output} - { - } - - /** - * @brief Return a pointer to the output array at the given index. - * - * @param idx The access index - * @return A pointer to the given index in the output array - */ - __device__ OutputType* get_output_ptr(size_type const idx) const - { - auto const iter = d_map.find(idx, d_hasher, d_equal); - - if (iter != d_map.end()) { - // Only one (undetermined) index value of the duplicate rows could be inserted into the map. - // As such, looking up for all indices of duplicate rows always returns the same value. - auto const inserted_idx = iter->second.load(cuda::std::memory_order_relaxed); - - // All duplicate rows will have concurrent access to this same output slot. - return &d_output[inserted_idx]; - } else { - // All input `idx` values have been inserted into the map before. - // Thus, searching for an `idx` key resulting in the `end()` iterator only happens if - // `d_equal(idx, idx) == false`. - // Such situations are due to comparing nulls or NaNs which are considered as always unequal. - // In those cases, all rows containing nulls or NaNs are distinct. Just return their direct - // output slot. - return &d_output[idx]; - } - } -}; - -/** - * @brief Perform a reduction on groups of rows that are compared equal. - * - * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared - * equal. A hash table is used to find groups of equal rows. - * - * At the beginning of the operation, the entire output array is filled with a value given by - * the `init` parameter. Then, the reduction result for each row group is written into the output - * array at the index of an unspecified row in the group. - * - * @tparam ReduceFuncBuilder The builder class that must have a `build()` method returning a - * reduction functor derived from `reduce_by_row_fn_base` - * @tparam OutputType Type of the reduction results - * @param map The auxiliary map to perform reduction - * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row - * comparisons - * @param num_rows The number of all input rows - * @param has_nulls Indicate whether the input rows has any nulls at any nested levels - * @param has_nested_columns Indicates whether the input table has any nested columns - * @param nulls_equal Flag to specify whether null elements should be considered as equal - * @param nans_equal Flag to specify whether NaN values in floating point column should be - * considered equal. - * @param init The initial value for reduction of each row group - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the returned vector - * @return A device_uvector containing the reduction results - */ -template -rmm::device_uvector hash_reduce_by_row( - hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - null_equality nulls_equal, - nan_equality nans_equal, - ReduceFuncBuilder func_builder, - OutputType init, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) -{ - auto const map_dview = map.get_device_view(); - auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); - auto const key_hasher = row_hasher.device_hasher(has_nulls); - auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); - - auto reduction_results = rmm::device_uvector(num_rows, stream, mr); - thrust::uninitialized_fill( - rmm::exec_policy(stream), reduction_results.begin(), reduction_results.end(), init); - - auto const reduce_by_row = [&](auto const value_comp) { - if (has_nested_columns) { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin())); - } else { - auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); - thrust::for_each( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_rows), - func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin())); - } - }; - - if (nans_equal == nan_equality::ALL_EQUAL) { - using nan_equal_comparator = - cudf::experimental::row::equality::nan_equal_physical_equality_comparator; - reduce_by_row(nan_equal_comparator{}); - } else { - using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; - reduce_by_row(nan_unequal_comparator{}); - } - - return reduction_results; -} - -} // namespace cudf::detail diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 72ed3026580..dc163b63d23 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -16,9 +16,9 @@ // #include -#include #include +#include #include #include #include From 75c35c4864fcdc56fb1fd19f97717635fc961363 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Fri, 15 Sep 2023 09:55:48 -0700 Subject: [PATCH 045/100] Change test types --- cpp/tests/reductions/reduction_tests.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index e61db56ad52..83bf834251b 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -295,7 +295,12 @@ TYPED_TEST(SumReductionTest, Sum) } #endif -TYPED_TEST_SUITE(ReductionTest, cudf::test::FloatingPointTypes); + +using TestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes, + cudf::test::FixedPointTypes, + cudf::test::ChronoTypes>; +TYPED_TEST_SUITE(ReductionTest, TestTypes); #if 0 TYPED_TEST(ReductionTest, Product) From c6c2c4336d834250cb0ed20a02cbe213679f5a95 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 13:33:58 -0700 Subject: [PATCH 046/100] Rewrite tests --- cpp/tests/reductions/reduction_tests.cpp | 114 ++++++++--------------- 1 file changed, 40 insertions(+), 74 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 83bf834251b..bb9db061602 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -390,7 +390,29 @@ TYPED_TEST(ReductionTest, SumOfSquare) #endif -TYPED_TEST(ReductionTest, Histogram) +template +struct ReductionHistogramTest : public cudf::test::BaseFixture {}; + +using HistogramTestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes, + cudf::test::FixedPointTypes, + cudf::test::ChronoTypes>; +TYPED_TEST_SUITE(ReductionHistogramTest, HistogramTestTypes); + +auto histogram_reduction(cudf::column_view const& input, + std::unique_ptr const& agg) +{ + CUDF_EXPECTS( + agg->kind == cudf::aggregation::HISTOGRAM || agg->kind == cudf::aggregation::MERGE_HISTOGRAM, + "Aggregation must be either HISTOGRAM or MERGE_HISTOGRAM."); + + auto const result_scalar = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); + auto const result_col = dynamic_cast(result_scalar.get())->view(); + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + return std::move(cudf::gather(cudf::table_view{{result_col}}, *sort_order)->release().front()); +} + +TYPED_TEST(ReductionHistogramTest, Histogram) { using col_data = cudf::test::fixed_width_column_wrapper; using int64_data = cudf::test::fixed_width_column_wrapper; @@ -406,15 +428,8 @@ TYPED_TEST(ReductionTest, Histogram) auto child2 = int64_data{2, 1, 1, 2, 4, 1}; return structs_col{{child1, child2}}; }(); - auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); - auto const result_col = dynamic_cast(result.get())->view(); - // cudf::test::print(result_col); - - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); - auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - // cudf::test::print(sorted_result->get_column(0).view()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } // Test without nulls, sliced input. @@ -426,15 +441,8 @@ TYPED_TEST(ReductionTest, Histogram) auto child2 = int64_data{1, 1, 1, 3, 1}; return structs_col{{child1, child2}}; }(); - auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); - auto const result_col = dynamic_cast(result.get())->view(); - // cudf::test::print(result_col); - - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); - auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - // cudf::test::print(sorted_result->get_column(0).view()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } // Test with nulls. @@ -449,15 +457,8 @@ TYPED_TEST(ReductionTest, Histogram) auto child2 = int64_data{4, 2, 1, 1, 2, 4, 1}; return structs_col{{child1, child2}}; }(); - auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); - auto const result_col = dynamic_cast(result.get())->view(); - // cudf::test::print(result_col); - - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); - auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - // cudf::test::print(sorted_result->get_column(0).view()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } // Test with nulls, sliced input. @@ -473,19 +474,12 @@ TYPED_TEST(ReductionTest, Histogram) auto child2 = int64_data{2, 1, 1, 1, 3, 1}; return structs_col{{child1, child2}}; }(); - auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); - auto const result_col = dynamic_cast(result.get())->view(); - // cudf::test::print(result_col); - - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); - auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - // cudf::test::print(sorted_result->get_column(0).view()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } } -TYPED_TEST(ReductionTest, MergeHistogram) +TYPED_TEST(ReductionHistogramTest, MergeHistogram) { using col_data = cudf::test::fixed_width_column_wrapper; using int64_data = cudf::test::fixed_width_column_wrapper; @@ -506,15 +500,8 @@ TYPED_TEST(ReductionTest, MergeHistogram) auto child2 = int64_data{5, 5, 4, 5, 8, 1}; return structs_col{{child1, child2}}; }(); - auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); - auto const result_col = dynamic_cast(result.get())->view(); - // cudf::test::print(result_col); - - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); - auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - // cudf::test::print(sorted_result->get_column(0).view()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } // Test without nulls, sliced input. @@ -531,15 +518,8 @@ TYPED_TEST(ReductionTest, MergeHistogram) auto child2 = int64_data{2, 4, 1, 5, 1}; return structs_col{{child1, child2}}; }(); - auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); - auto const result_col = dynamic_cast(result.get())->view(); - // cudf::test::print(result_col); - - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); - auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - // cudf::test::print(sorted_result->get_column(0).view()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } // Test with nulls. @@ -559,15 +539,8 @@ TYPED_TEST(ReductionTest, MergeHistogram) auto child2 = int64_data{67, 5, 5, 4, 5, 8, 1}; return structs_col{{child1, child2}}; }(); - auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); - auto const result_col = dynamic_cast(result.get())->view(); - // cudf::test::print(result_col); - - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); - auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - // cudf::test::print(sorted_result->get_column(0).view()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } // Test with nulls, sliced input. @@ -588,15 +561,8 @@ TYPED_TEST(ReductionTest, MergeHistogram) auto child2 = int64_data{33, 2, 4, 1, 3, 1}; return structs_col{{child1, child2}}; }(); - auto const result = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); - auto const result_col = dynamic_cast(result.get())->view(); - // cudf::test::print(result_col); - - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); - auto const sorted_result = cudf::gather(cudf::table_view{{result_col}}, *sort_order); - // cudf::test::print(sorted_result->get_column(0).view()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, sorted_result->get_column(0).view()); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); } } From b5dd22a72eb6a91beb0ec6a7ec374315cf62abba Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 13:35:46 -0700 Subject: [PATCH 047/100] Misc --- cpp/tests/reductions/reduction_tests.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index bb9db061602..f6786a3a59c 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -393,6 +393,7 @@ TYPED_TEST(ReductionTest, SumOfSquare) template struct ReductionHistogramTest : public cudf::test::BaseFixture {}; +// Avoid unsigned types, as the tests below have negative values in their input. using HistogramTestTypes = cudf::test::Concat, cudf::test::FloatingPointTypes, cudf::test::FixedPointTypes, @@ -408,7 +409,9 @@ auto histogram_reduction(cudf::column_view const& input, auto const result_scalar = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); auto const result_col = dynamic_cast(result_scalar.get())->view(); - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); + + // Sort the histogram based on the first column (unique input values). + auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); return std::move(cudf::gather(cudf::table_view{{result_col}}, *sort_order)->release().front()); } From 17b8975e09c1457214ed4a5530a101043054a3d5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 13:36:29 -0700 Subject: [PATCH 048/100] Cleanup --- cpp/tests/reductions/reduction_tests.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index f6786a3a59c..d0287dc749a 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -120,7 +120,6 @@ struct ReductionTest : public cudf::test::BaseFixture { } }; -#if 0 template struct MinMaxReductionTest : public ReductionTest {}; @@ -294,15 +293,12 @@ TYPED_TEST(SumReductionTest, Sum) .second); } -#endif - using TestTypes = cudf::test::Concat, cudf::test::FloatingPointTypes, cudf::test::FixedPointTypes, cudf::test::ChronoTypes>; TYPED_TEST_SUITE(ReductionTest, TestTypes); -#if 0 TYPED_TEST(ReductionTest, Product) { using T = TypeParam; @@ -388,8 +384,6 @@ TYPED_TEST(ReductionTest, SumOfSquare) expected_null_value); } -#endif - template struct ReductionHistogramTest : public cudf::test::BaseFixture {}; @@ -569,7 +563,6 @@ TYPED_TEST(ReductionHistogramTest, MergeHistogram) } } -#if 0 template struct ReductionAnyAllTest : public ReductionTest {}; using AnyAllTypes = cudf::test::Types; @@ -3127,5 +3120,5 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls) *cudf::make_max_aggregation()); } } -#endif + CUDF_TEST_PROGRAM_MAIN() From c0b245f8057fd8e2e0b1b4235d392fd352b4768e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 13:37:34 -0700 Subject: [PATCH 049/100] Revert changes --- cpp/tests/reductions/reduction_tests.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index d0287dc749a..5665fdddfba 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -293,11 +293,7 @@ TYPED_TEST(SumReductionTest, Sum) .second); } -using TestTypes = cudf::test::Concat, - cudf::test::FloatingPointTypes, - cudf::test::FixedPointTypes, - cudf::test::ChronoTypes>; -TYPED_TEST_SUITE(ReductionTest, TestTypes); +TYPED_TEST_SUITE(ReductionTest, cudf::test::NumericTypes); TYPED_TEST(ReductionTest, Product) { From a8b3696652417279bb2e6c94acf45d18424b75e6 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 13:42:17 -0700 Subject: [PATCH 050/100] Add more assert statements --- cpp/tests/reductions/reduction_tests.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 5665fdddfba..f824695bbd5 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -398,7 +398,13 @@ auto histogram_reduction(cudf::column_view const& input, "Aggregation must be either HISTOGRAM or MERGE_HISTOGRAM."); auto const result_scalar = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64}); - auto const result_col = dynamic_cast(result_scalar.get())->view(); + EXPECT_EQ(result_scalar->is_valid(), true); + + auto const result_list_scalar = dynamic_cast(result_scalar.get()); + EXPECT_NE(result_list_scalar, nullptr); + + auto const result_col = result_list_scalar->view(); + EXPECT_EQ(result_col.num_children(), 2); // Sort the histogram based on the first column (unique input values). auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); From a7fee3082c57ae4dd42798ba0562df21b44e4d71 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 13:44:03 -0700 Subject: [PATCH 051/100] Clean up tests --- cpp/tests/reductions/reduction_tests.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index f824695bbd5..f23abf431ee 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -445,10 +445,9 @@ TYPED_TEST(ReductionHistogramTest, Histogram) } // Test with nulls. + using namespace cudf::test::iterators; + auto constexpr null{0}; { - using namespace cudf::test::iterators; - auto constexpr null{0}; - auto const input = col_data{{null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, nulls_at({0, 6, 9, 12})}; auto const expected = [] { @@ -462,9 +461,6 @@ TYPED_TEST(ReductionHistogramTest, Histogram) // Test with nulls, sliced input. { - using namespace cudf::test::iterators; - auto constexpr null{0}; - auto const input_original = col_data{ {null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, nulls_at({0, 6, 9, 12})}; auto const input = cudf::slice(input_original, {0, 9})[0]; @@ -522,10 +518,9 @@ TYPED_TEST(ReductionHistogramTest, MergeHistogram) } // Test with nulls. + using namespace cudf::test::iterators; + auto constexpr null{0}; { - using namespace cudf::test::iterators; - auto constexpr null{0}; - auto const input = [] { auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, nulls_at({2, 5, 8, 11, 15})}; @@ -544,9 +539,6 @@ TYPED_TEST(ReductionHistogramTest, MergeHistogram) // Test with nulls, sliced input. { - using namespace cudf::test::iterators; - auto constexpr null{0}; - auto const input_original = [] { auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, nulls_at({2, 5, 8, 11, 15})}; From 829017a3de7cb5285dbf522e67d32e5403a7b710 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 13:48:41 -0700 Subject: [PATCH 052/100] Add docs --- cpp/include/cudf/aggregation.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index b4491b68da2..75d693df9e0 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -618,8 +618,12 @@ template std::unique_ptr make_merge_m2_aggregation(); /** - * @brief make_merge_m2_aggregation - * @return + * @brief Factory to create a MERGE_HISTOGRAM aggregation + * + * Merges the results of `HISTOGRAM` aggregations on independent sets into a new `HISTOGRAM` value + * equivalent to if a single `HISTOGRAM` aggregation was done across all of the sets at once. + * + * @return A MERGE_HISTOGRAM aggregation object */ template std::unique_ptr make_merge_histogram_aggregation(); From e53042e1608046d22ee3785ef6965f8e8caa526f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 14:36:23 -0700 Subject: [PATCH 053/100] Rewrite docs --- .../reduction/detail/reduction_functions.hpp | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp index 804b79593da..9c85e754c2f 100644 --- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -132,21 +132,33 @@ std::unique_ptr all(column_view const& col, rmm::mr::device_memory_resource* mr); /** - * @brief + * @brief Compute frequency for each unique element in the input column. * - * If all elements in input column are null, output scalar is null. + * The result histogram is stored in structs column having two children. The first child contains + * unique elements from the input, and the second child contains their corresponding frequencies. + * + * @throw cudf::logic_error if `output_dtype` is not integer type + * + * @param input The column to compute histogram + * @param output_dtype Data type to store the element frequencies + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return A list_scalar storing a structs column as the result histogram */ -std::unique_ptr histogram(column_view const& col, +std::unique_ptr histogram(column_view const& input, data_type const output_dtype, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** - * @brief + * @brief Merge multiple histograms together. * - * If all elements in input column are null, output scalar is null. + * @param input The input given as multiple histograms concatenated together + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned scalar's device memory + * @return A list_scalar storing the result histogram */ -std::unique_ptr merge_histogram(column_view const& col, +std::unique_ptr merge_histogram(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); From 49608ab8277aa4fa73acde4ecbd8657eed96b5c8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 15:54:34 -0700 Subject: [PATCH 054/100] Add a helper file --- cpp/src/reductions/histogram_helpers.hpp | 49 ++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 cpp/src/reductions/histogram_helpers.hpp diff --git a/cpp/src/reductions/histogram_helpers.hpp b/cpp/src/reductions/histogram_helpers.hpp new file mode 100644 index 00000000000..64386c023e4 --- /dev/null +++ b/cpp/src/reductions/histogram_helpers.hpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +#include + +namespace cudf::reduction::detail { + +/** + * @brief Compute the histogram for the input table. + * + * This is equivalent to do a distinct count for each unique rows in the input. + * + * @param input The input table to compute histogram + * @param partial_counts An optional column containing counts for each row + * @param output_dtype The output type to store the count value + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate memory of the returned objects + * @return A pair of array contains the indices of the distinct rows in the input table, and their + * corresponding distinct counts + */ +std::pair, std::unique_ptr> table_histogram( + table_view const& input, + std::optional const& partial_counts, + data_type const output_dtype, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); + +} // namespace cudf::reduction::detail From 08aac0ea2a91dadbe888b5ecdd56aec399b5ba73 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 15:57:53 -0700 Subject: [PATCH 055/100] Rewrite histogram --- cpp/src/reductions/histogram.cu | 147 +++++++++++++++++--------------- 1 file changed, 79 insertions(+), 68 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index dc163b63d23..73343946792 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -14,13 +14,11 @@ * limitations under the License. */ -// #include +#include -#include - -#include #include #include +#include #include #include #include @@ -36,7 +34,7 @@ namespace cudf::reduction::detail { namespace { /** - * @brief The functor to compute the occurences of each unique rows in the input table. + * @brief The functor to accumulate the frequency of each distinct rows in the input table. */ template struct reduce_fn : cudf::detail::reduce_by_row_fn_base { @@ -58,12 +56,10 @@ struct reduce_fn : cudf::detail::reduce_by_row_fn_base count(*this->get_output_ptr(idx)); - if (d_partial_output) { - count.fetch_add(d_partial_output[idx], cuda::std::memory_order_relaxed); - } else { - count.fetch_add(OutputType{1}, cuda::std::memory_order_relaxed); - } + auto const increment = d_partial_output ? d_partial_output[idx] : OutputType{1}; + auto const count = + cuda::atomic_ref(*this->get_output_ptr(idx)); + count.fetch_add(increment, cuda::std::memory_order_relaxed); } }; @@ -89,20 +85,31 @@ struct reduce_func_builder { } }; +/** + * @brief Specialized functor to check for non-zero. + * + * The input must be given as Pair. Only value of T2 is checked for non-zero. + */ struct is_none_zero { template - __device__ bool operator()(Pair const inp_pair) const + __device__ bool operator()(Pair const input) const { - return thrust::get<1>(inp_pair) != 0; + return thrust::get<1>(input) != 0; } }; +/** + * @brief Dispatcher functor to compute histogram in the given OutputType. + * + * The indices of distinct rows and their corresponding frequencies are written into two separate + * output buffer. + */ struct histogram_dispatcher { template static bool constexpr is_supported() { // Currently only int64_t is requested by Spark-Rapids. - // More data type can be supported by enabling it below. + // More data type (integer only) can be supported by enabling below. return std::is_same_v; } @@ -138,39 +145,55 @@ struct histogram_dispatcher { stream, rmm::mr::get_current_device_resource()); - column_view cv = column_view(data_type{type_id::INT64}, - (int)reduction_results.size(), - reduction_results.data(), - nullptr, - 0); - // printf("reduction result, num rows = %d\n", num_rows); - // cudf::test::print(cv); - auto const input_it = thrust::make_zip_iterator( thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); - auto const output_it = thrust::make_zip_iterator( thrust::make_tuple(output_indices, output_counts.begin())); + // Reduction results above are either group sizes of equal rows, or `0`. + // Thus, we need to extract the non-zero group sizes. thrust::copy_if( rmm::exec_policy(stream), input_it, input_it + num_rows, output_it, is_none_zero{}); - - // Reduction results are either group sizes of equal rows, or `0`. - // Thus, we only needs to extract the non-zero group sizes. } }; +auto gather_histogram(table_view const& input, + device_span distinct_indices, + std::unique_ptr&& distinct_counts, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto distinct_rows = + std::move(cudf::detail::gather(input, + distinct_indices, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr) + ->release() + .front()); + + std::vector> struct_children; + struct_children.emplace_back(std::move(distinct_rows)); + struct_children.emplace_back(std::move(distinct_counts)); + auto output_structs = make_structs_column( + static_cast(distinct_indices.size()), std::move(struct_children), 0, {}, stream, mr); + + return std::make_unique( + std::move(*output_structs.release()), true, stream, mr); +} + } // namespace -std::unique_ptr histogram(table_view const& input, - std::optional const& partial_distinct_counts, - data_type const output_dtype, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::pair, std::unique_ptr> table_histogram( + table_view const& input, + std::optional const& partial_counts, + data_type const output_dtype, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::is_integral(output_dtype) && - (cudf::size_of(output_dtype) == 4 || cudf::size_of(output_dtype) == 8), - "The output type of histogram aggregation must be an 32/64bit integral type."); + CUDF_EXPECTS(cudf::is_integral(output_dtype), + "The output type of histogram aggregation must be an integral type."); auto map = cudf::detail::hash_map_type{ compute_hash_table_size(input.num_rows()), @@ -191,9 +214,11 @@ std::unique_ptr histogram(table_view const& input, auto const pair_iter = cudf::detail::make_counting_transform_iterator( size_type{0}, [] __device__(size_type const i) { return cuco::make_pair(i, i); }); + // Always compare NaNs as equal. using nan_equal_comparator = cudf::experimental::row::equality::nan_equal_physical_equality_comparator; auto const value_comp = nan_equal_comparator{}; + if (has_nested_columns) { auto const key_equal = row_comp.equal_to(has_nulls, null_equality::EQUAL, value_comp); map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); @@ -202,14 +227,14 @@ std::unique_ptr histogram(table_view const& input, map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value()); } - // Gather the indices of distinct rows and distinct rows. + // Gather the indices of distinct rows. auto distinct_indices = rmm::device_uvector( static_cast(map.get_size()), stream, rmm::mr::get_current_device_resource()); - // map.retrieve_all(distinct_indices.begin(), thrust::make_discard_iterator(), stream.value()); - // Count the number of occurences of each unique row. + // Store the number of occurences of each distinct row. auto distinct_counts = make_numeric_column( output_dtype, static_cast(map.get_size()), mask_state::UNALLOCATED, stream, mr); + type_dispatcher(output_dtype, histogram_dispatcher{}, map, @@ -219,29 +244,10 @@ std::unique_ptr histogram(table_view const& input, has_nested_columns, distinct_indices.begin(), distinct_counts->mutable_view(), - partial_distinct_counts, + partial_counts, stream); - auto distinct_rows = - std::move(cudf::detail::gather(input, - distinct_indices, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr) - ->release() - .front()); - // printf("reduction result 2\n"); - // cudf::test::print(distinct_counts->view()); - - std::vector> struct_children; - struct_children.emplace_back(std::move(distinct_rows)); - struct_children.emplace_back(std::move(distinct_counts)); - auto output_structs = make_structs_column( - static_cast(map.get_size()), std::move(struct_children), 0, {}, stream, mr); - - return std::make_unique( - std::move(*output_structs.release()), true, stream, mr); + return {std::move(distinct_indices), std::move(distinct_counts)}; } std::unique_ptr histogram(column_view const& input, @@ -249,26 +255,31 @@ std::unique_ptr histogram(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return histogram(table_view{{input}}, std::nullopt, output_dtype, stream, mr); + auto const input_tv = table_view{{input}}; + auto [distinct_indices, distinct_counts] = + table_histogram(input_tv, std::nullopt, output_dtype, stream, mr); + return gather_histogram(input_tv, distinct_indices, std::move(distinct_counts), stream, mr); } std::unique_ptr merge_histogram(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls."); CUDF_EXPECTS( input.type().id() == type_id::STRUCT && input.num_children() == 2, "The input of merge_histogram aggregation must be a struct column having two children."); - CUDF_EXPECTS(input.child(1).type().id() == type_id::INT64 && !input.child(1).has_nulls(), - "The second child of the input column must be INT64 type and has no nulls."); - CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls."); + CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(), + "The second child of the input column must be ingegral type and has no nulls."); + + auto const structs_cv = structs_column_view{input}; + auto const input_values = structs_cv.get_sliced_child(0, stream); + auto const input_counts = structs_cv.get_sliced_child(1, stream); - auto const structs_cv = structs_column_view{input}; - return histogram(table_view{{structs_cv.get_sliced_child(0, stream)}}, - structs_cv.get_sliced_child(1, stream), - data_type{type_id::INT64}, - stream, - mr); + auto const values_tv = table_view{{input_values}}; + auto [distinct_indices, distinct_counts] = + table_histogram(values_tv, input_counts, data_type{type_id::INT64}, stream, mr); + return gather_histogram(values_tv, distinct_indices, std::move(distinct_counts), stream, mr); } } // namespace cudf::reduction::detail From aaaf3474c8028b92e6c84290c4eeb201cd7f96c8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 16:06:02 -0700 Subject: [PATCH 056/100] Add docs --- cpp/src/groupby/sort/aggregate.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index f59f2ab0271..7416f114671 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -546,6 +546,12 @@ void aggregate_result_functor::operator()(aggregation con get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr)); } +/** + * @brief Perform merging for multiple histograms that correspond to the same key value. + * + * The partial results input to this aggregation is a structs column that is (vertically) + * concatenated from multiple outputs of HISTOGRAM aggregations. + */ template <> void aggregate_result_functor::operator()(aggregation const& agg) { From 2f5b343f7cb223d58a4445f59211fb49118d77b0 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 16:07:42 -0700 Subject: [PATCH 057/100] Remove file --- cpp/src/reductions/histogram.cuh | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 cpp/src/reductions/histogram.cuh diff --git a/cpp/src/reductions/histogram.cuh b/cpp/src/reductions/histogram.cuh deleted file mode 100644 index 5951b91a964..00000000000 --- a/cpp/src/reductions/histogram.cuh +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace cudf::reduction::detail { - -} // namespace cudf::reduction::detail From c11f939d3824d1f99800f58a95ef34ad73727305 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 16:39:25 -0700 Subject: [PATCH 058/100] Rewrite docs --- cpp/src/groupby/sort/aggregate.cpp | 4 ++-- cpp/src/groupby/sort/group_reductions.hpp | 22 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 7416f114671..10c271f76f9 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -549,8 +549,8 @@ void aggregate_result_functor::operator()(aggregation con /** * @brief Perform merging for multiple histograms that correspond to the same key value. * - * The partial results input to this aggregation is a structs column that is (vertically) - * concatenated from multiple outputs of HISTOGRAM aggregations. + * The partial results input to this aggregation is a structs column that is concatenated from + * multiple outputs of HISTOGRAM aggregations. */ template <> void aggregate_result_functor::operator()(aggregation const& agg) diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 8acf046324b..6ff0d9df47a 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -217,12 +217,20 @@ std::unique_ptr group_count_all(cudf::device_span group rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); /** - * @brief + * @brief Internal API to compute histogram for each group in @p values. + * + * The returned column is a lists column, each list corresponds to one input group and stores the + * histogram of the distinct elements in that group in the form of `STRUCT`. * * @code{.pseudo} + * values = [2, 1, 1, 3, 5, 2, 2, 3, 1, 4] + * group_labels = [0, 0, 0, 1, 1, 1, 1, 1, 2, 2] + * num_groups = 3 + * + * output = [[<1, 2>, <2, 1>], [<2, 2>, <3, 2>, <5, 1>], [<1, 1>, <4, 1>]] * @endcode * - * @param values Grouped values to get valid count of + * @param values Grouped values to compute histogram * @param group_labels ID of group that the corresponding value belongs to * @param num_groups Number of groups ( unique values in @p group_labels ) * @param stream CUDA stream used for device memory operations and kernel launches. @@ -460,9 +468,17 @@ std::unique_ptr group_merge_m2(column_view const& values, rmm::mr::device_memory_resource* mr); /** - * @brief + * @brief Internal API to merge multiple output of HISTOGRAM aggregation. + * + * The input values column should be given as a structs column in the form of + * `STRUCT`. * * @code{.pseudo} + * values = [<1, 2>, <2, 1>, <2, 2>, <3, 2>, <2, 1>, <1, 1>, <2, 1>] + * group_labels = [0, 0, 0, 1, 1, 1, 1] + * num_groups = 2 + * + * output = [[<1, 2>, <2, 3>], [<1, 1>, <2, 2>, <3, 3>]]] * @endcode * * @param values Grouped values to get valid count of From d10842e5e2bef044686178b15251be4cbb7d03f8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 16:43:22 -0700 Subject: [PATCH 059/100] Change docs --- cpp/src/groupby/sort/group_reductions.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 6ff0d9df47a..24d631d6f51 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -222,6 +222,8 @@ std::unique_ptr group_count_all(cudf::device_span group * The returned column is a lists column, each list corresponds to one input group and stores the * histogram of the distinct elements in that group in the form of `STRUCT`. * + * Note that the order of distinct elements in each output list is not specified. + * * @code{.pseudo} * values = [2, 1, 1, 3, 5, 2, 2, 3, 1, 4] * group_labels = [0, 0, 0, 1, 1, 1, 1, 1, 2, 2] @@ -472,6 +474,7 @@ std::unique_ptr group_merge_m2(column_view const& values, * * The input values column should be given as a structs column in the form of * `STRUCT`. + * After merging, the order of distinct elements in each output list is not specified. * * @code{.pseudo} * values = [<1, 2>, <2, 1>, <2, 2>, <3, 2>, <2, 1>, <1, 1>, <2, 1>] From 6abc7b509336627dfe5ffe64d2b04b1087df6f9b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 21:50:40 -0700 Subject: [PATCH 060/100] Add headers --- cpp/src/reductions/histogram.cu | 1 + cpp/src/reductions/histogram_helpers.hpp | 1 + 2 files changed, 2 insertions(+) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 73343946792..5f6386fedba 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/reductions/histogram_helpers.hpp b/cpp/src/reductions/histogram_helpers.hpp index 64386c023e4..2a271ec70a5 100644 --- a/cpp/src/reductions/histogram_helpers.hpp +++ b/cpp/src/reductions/histogram_helpers.hpp @@ -21,6 +21,7 @@ #include #include +#include #include From f833f58162003a7b52975cea4c3b3a3309237f1c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 21:50:53 -0700 Subject: [PATCH 061/100] Implement groupby histogram and merge histogram aggs --- cpp/src/groupby/sort/group_histogram.cu | 167 +++++++++++++----------- 1 file changed, 92 insertions(+), 75 deletions(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 5123a9fb500..4cb9320f418 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -14,104 +14,121 @@ * limitations under the License. */ +#include +#include + #include #include -#include +#include +#include #include #include -#include -#include - -#include -#include -#include -#include -#include +#include namespace cudf::groupby::detail { -std::unique_ptr group_histogram(column_view const& values, + +// Fixed type for counting frequencies in historam. +// This is to avoid using `target_type_t` which requires type_dispatcher. +constexpr auto histogram_count_dtype = data_type{type_to_id()}; + +namespace { +auto make_empty_histogram(column_view const& values) +{ + std::vector> struct_children; + struct_children.emplace_back(empty_like(values)); + struct_children.emplace_back(make_numeric_column(histogram_count_dtype, 0)); + auto structs = std::make_unique(data_type{type_id::STRUCT}, + 0, + rmm::device_buffer{}, + rmm::device_buffer{}, + 0, + std::move(struct_children)); + + std::vector> lists_children; + lists_children.emplace_back(make_numeric_column(data_type{type_to_id()}, 0)); + lists_children.emplace_back(std::move(structs)); + return std::make_unique(cudf::data_type{type_id::LIST}, + 0, + rmm::device_buffer{}, + rmm::device_buffer{}, + 0, + std::move(lists_children)); +} + +std::unique_ptr group_histogram(column_view const& input, cudf::device_span group_labels, + std::optional const& partial_counts, size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); - CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), + CUDF_EXPECTS(static_cast(input.size()) == group_labels.size(), "Size of values column should be same as that of group labels"); - auto result = make_numeric_column( - data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); - - if (num_groups == 0) { return result; } - - if (values.nullable()) { - auto values_view = column_device_view::create(values, stream); - - // make_validity_iterator returns a boolean iterator that sums to 1 (1+1=1) - // so we need to transform it to cast it to an integer type - auto bitmask_iterator = - thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*values_view), - [] __device__(auto b) { return static_cast(b); }); - - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - bitmask_iterator, - thrust::make_discard_iterator(), - result->mutable_view().begin()); - } else { - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - thrust::make_constant_iterator(1), - thrust::make_discard_iterator(), - result->mutable_view().begin()); - } - - return result; + if (num_groups == 0) { return make_empty_histogram(input); } + + auto const labels_cv = column_view{data_type{type_to_id()}, + static_cast(group_labels.size()), + group_labels.data(), + nullptr, + 0}; + auto const labeled_values = table_view{{labels_cv, input}}; + + auto [distinct_indices, distinct_counts] = cudf::reduction::detail::table_histogram( + labeled_values, partial_counts, histogram_count_dtype, stream, mr); + auto out_table = cudf::detail::gather(labeled_values, + distinct_indices, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); + + auto out_offsets = cudf::lists::detail::reconstruct_offsets( + out_table->get_column(0).view(), num_groups, stream, mr); + + std::vector> struct_children; + struct_children.emplace_back(std::move(out_table->release().back())); + struct_children.emplace_back(std::move(distinct_counts)); + auto out_structs = make_structs_column( + static_cast(distinct_indices.size()), std::move(struct_children), 0, {}, stream, mr); + + return make_lists_column( + num_groups, std::move(out_offsets), std::move(out_structs), 0, {}, stream, mr); } -std::unique_ptr group_merge_histogram(column_view const& values, +} // namespace + +std::unique_ptr group_histogram(column_view const& input, + cudf::device_span group_labels, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + return group_histogram(input, group_labels, std::nullopt, num_groups, stream, mr); +} + +std::unique_ptr group_merge_histogram(column_view const& input, cudf::device_span group_labels, size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); - CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), - "Size of values column should be same as that of group labels"); + CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls."); + CUDF_EXPECTS( + input.type().id() == type_id::STRUCT && input.num_children() == 2, + "The input of merge_histogram aggregation must be a struct column having two children."); + CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(), + "The second child of the input column must be ingegral type and has no nulls."); + + if (num_groups == 0) { return empty_like(input); } + + auto const structs_cv = structs_column_view{input}; + auto const input_values = structs_cv.get_sliced_child(0, stream); + auto const input_counts = structs_cv.get_sliced_child(1, stream); - auto result = make_numeric_column( - data_type(type_to_id()), num_groups, mask_state::UNALLOCATED, stream, mr); - - if (num_groups == 0) { return result; } - - if (values.nullable()) { - auto values_view = column_device_view::create(values, stream); - - // make_validity_iterator returns a boolean iterator that sums to 1 (1+1=1) - // so we need to transform it to cast it to an integer type - auto bitmask_iterator = - thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*values_view), - [] __device__(auto b) { return static_cast(b); }); - - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - bitmask_iterator, - thrust::make_discard_iterator(), - result->mutable_view().begin()); - } else { - thrust::reduce_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - thrust::make_constant_iterator(1), - thrust::make_discard_iterator(), - result->mutable_view().begin()); - } - - return result; + return group_histogram(input_values, group_labels, input_counts, num_groups, stream, mr); } } // namespace cudf::groupby::detail From ef308e8c26ad8f3baabe8a3bd8b71482e2da444e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 21:53:26 -0700 Subject: [PATCH 062/100] Update header copyright --- cpp/src/groupby/sort/group_reductions.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 24d631d6f51..c1d42987906 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 70e624d4d355504c26f2789ae297f72cdb0b08ff Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 18 Sep 2023 21:53:32 -0700 Subject: [PATCH 063/100] Rename function --- cpp/src/groupby/sort/group_histogram.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 4cb9320f418..78b1db23700 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -56,12 +56,12 @@ auto make_empty_histogram(column_view const& values) std::move(lists_children)); } -std::unique_ptr group_histogram(column_view const& input, - cudf::device_span group_labels, - std::optional const& partial_counts, - size_type num_groups, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr histogram(column_view const& input, + cudf::device_span group_labels, + std::optional const& partial_counts, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); CUDF_EXPECTS(static_cast(input.size()) == group_labels.size(), @@ -106,7 +106,7 @@ std::unique_ptr group_histogram(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return group_histogram(input, group_labels, std::nullopt, num_groups, stream, mr); + return histogram(input, group_labels, std::nullopt, num_groups, stream, mr); } std::unique_ptr group_merge_histogram(column_view const& input, @@ -128,7 +128,7 @@ std::unique_ptr group_merge_histogram(column_view const& input, auto const input_values = structs_cv.get_sliced_child(0, stream); auto const input_counts = structs_cv.get_sliced_child(1, stream); - return group_histogram(input_values, group_labels, input_counts, num_groups, stream, mr); + return histogram(input_values, group_labels, input_counts, num_groups, stream, mr); } } // namespace cudf::groupby::detail From ee91b2e3d5018056528c97a5301a6fa975ca57f5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 06:35:42 -0700 Subject: [PATCH 064/100] Fix typos --- cpp/src/groupby/sort/group_histogram.cu | 2 +- cpp/src/reductions/histogram.cu | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 78b1db23700..afb763a021b 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -120,7 +120,7 @@ std::unique_ptr group_merge_histogram(column_view const& input, input.type().id() == type_id::STRUCT && input.num_children() == 2, "The input of merge_histogram aggregation must be a struct column having two children."); CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(), - "The second child of the input column must be ingegral type and has no nulls."); + "The second child of the input column must be integral type and has no nulls."); if (num_groups == 0) { return empty_like(input); } diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 5f6386fedba..046068b3f0d 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -232,7 +232,7 @@ std::pair, std::unique_ptr> table_histogr auto distinct_indices = rmm::device_uvector( static_cast(map.get_size()), stream, rmm::mr::get_current_device_resource()); - // Store the number of occurences of each distinct row. + // Store the number of occurrences of each distinct row. auto distinct_counts = make_numeric_column( output_dtype, static_cast(map.get_size()), mask_state::UNALLOCATED, stream, mr); @@ -271,7 +271,7 @@ std::unique_ptr merge_histogram(column_view const& input, input.type().id() == type_id::STRUCT && input.num_children() == 2, "The input of merge_histogram aggregation must be a struct column having two children."); CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(), - "The second child of the input column must be ingegral type and has no nulls."); + "The second child of the input column must be integral type and has no nulls."); auto const structs_cv = structs_column_view{input}; auto const input_values = structs_cv.get_sliced_child(0, stream); From 7c51faa0cb6931b2efe734fb4558a7cd80ef32ac Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 09:25:50 -0700 Subject: [PATCH 065/100] Add file --- cpp/tests/CMakeLists.txt | 1 + cpp/tests/groupby/histogram_tests.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 cpp/tests/groupby/histogram_tests.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a69dc9bf2f8..9645f322f81 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -116,6 +116,7 @@ ConfigureTest( groupby/covariance_tests.cpp groupby/groupby_test_util.cpp groupby/groups_tests.cpp + groupby/histogram_tests.cpp groupby/keys_tests.cpp groupby/lists_tests.cpp groupby/m2_tests.cpp diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp new file mode 100644 index 00000000000..761a2abacae --- /dev/null +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include From 270bcb8493271c2992295b53e31d49ea627327bf Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 10:01:09 -0700 Subject: [PATCH 066/100] Add docs --- cpp/src/groupby/sort/group_histogram.cu | 43 ++++++++++++++---------- cpp/src/reductions/histogram_helpers.hpp | 4 +-- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index afb763a021b..e042d88f837 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -56,28 +56,32 @@ auto make_empty_histogram(column_view const& values) std::move(lists_children)); } -std::unique_ptr histogram(column_view const& input, - cudf::device_span group_labels, - std::optional const& partial_counts, - size_type num_groups, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::unique_ptr build_histogram(column_view const& values, + cudf::device_span group_labels, + std::optional const& partial_counts, + size_type num_groups, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); - CUDF_EXPECTS(static_cast(input.size()) == group_labels.size(), + CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), "Size of values column should be same as that of group labels"); - if (num_groups == 0) { return make_empty_histogram(input); } + if (num_groups == 0) { return make_empty_histogram(values); } + // Attach group labels to the input values. auto const labels_cv = column_view{data_type{type_to_id()}, static_cast(group_labels.size()), group_labels.data(), nullptr, 0}; - auto const labeled_values = table_view{{labels_cv, input}}; + auto const labeled_values = table_view{{labels_cv, values}}; + // Build histogram for the labeled values. auto [distinct_indices, distinct_counts] = cudf::reduction::detail::table_histogram( labeled_values, partial_counts, histogram_count_dtype, stream, mr); + + // Gather the distinct rows for output histogram. auto out_table = cudf::detail::gather(labeled_values, distinct_indices, out_of_bounds_policy::DONT_CHECK, @@ -85,6 +89,8 @@ std::unique_ptr histogram(column_view const& input, stream, mr); + // Build offsets for the output lists column. + // Each list will be a histogram corresponding to each value group. auto out_offsets = cudf::lists::detail::reconstruct_offsets( out_table->get_column(0).view(), num_groups, stream, mr); @@ -100,35 +106,36 @@ std::unique_ptr histogram(column_view const& input, } // namespace -std::unique_ptr group_histogram(column_view const& input, +std::unique_ptr group_histogram(column_view const& values, cudf::device_span group_labels, size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return histogram(input, group_labels, std::nullopt, num_groups, stream, mr); + return build_histogram(values, group_labels, std::nullopt, num_groups, stream, mr); } -std::unique_ptr group_merge_histogram(column_view const& input, +std::unique_ptr group_merge_histogram(column_view const& values, cudf::device_span group_labels, size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls."); + CUDF_EXPECTS(!values.has_nulls(), "The input column must not have nulls."); CUDF_EXPECTS( - input.type().id() == type_id::STRUCT && input.num_children() == 2, + values.type().id() == type_id::STRUCT && values.num_children() == 2, "The input of merge_histogram aggregation must be a struct column having two children."); - CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(), + CUDF_EXPECTS(cudf::is_integral(values.child(1).type()) && !values.child(1).has_nulls(), "The second child of the input column must be integral type and has no nulls."); - if (num_groups == 0) { return empty_like(input); } + if (num_groups == 0) { return empty_like(values); } - auto const structs_cv = structs_column_view{input}; + // The input values column is already in histogram format (i.e., column of Struct). + auto const structs_cv = structs_column_view{values}; auto const input_values = structs_cv.get_sliced_child(0, stream); auto const input_counts = structs_cv.get_sliced_child(1, stream); - return histogram(input_values, group_labels, input_counts, num_groups, stream, mr); + return build_histogram(input_values, group_labels, input_counts, num_groups, stream, mr); } } // namespace cudf::groupby::detail diff --git a/cpp/src/reductions/histogram_helpers.hpp b/cpp/src/reductions/histogram_helpers.hpp index 2a271ec70a5..3d3d548cc00 100644 --- a/cpp/src/reductions/histogram_helpers.hpp +++ b/cpp/src/reductions/histogram_helpers.hpp @@ -37,8 +37,8 @@ namespace cudf::reduction::detail { * @param output_dtype The output type to store the count value * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate memory of the returned objects - * @return A pair of array contains the indices of the distinct rows in the input table, and their - * corresponding distinct counts + * @return A pair of array contains the (stable-order) indices of the distinct rows in the input + * table, and their corresponding distinct counts */ std::pair, std::unique_ptr> table_histogram( table_view const& input, From 6447877645a69acb157fc0c56072f19f0a96acbd Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 11:01:16 -0700 Subject: [PATCH 067/100] Add empty tests --- cpp/tests/reductions/reduction_tests.cpp | 29 +++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index f23abf431ee..34e1011d294 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -419,7 +419,18 @@ TYPED_TEST(ReductionHistogramTest, Histogram) auto const agg = cudf::make_histogram_aggregation(); - // Test without nulls. + // Empty input. + { + auto const input = col_data{}; + auto const expected = [] { + auto child1 = col_data{}; + auto child2 = int64_data{}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + { auto const input = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; auto const expected = [] { @@ -482,6 +493,22 @@ TYPED_TEST(ReductionHistogramTest, MergeHistogram) auto const agg = cudf::make_merge_histogram_aggregation(); + // Empty input. + { + auto const input = [] { + auto child1 = col_data{}; + auto child2 = int64_data{}; + return structs_col{{child1, child2}}; + }(); + auto const expected = [] { + auto child1 = col_data{}; + auto child2 = int64_data{}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + // Test without nulls. { auto const input = [] { From c766e4354118ffa16f1f39af0050f0d3cdc66f7d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 12:13:01 -0700 Subject: [PATCH 068/100] Implement histogram tests --- cpp/tests/groupby/histogram_tests.cpp | 51 +++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp index 761a2abacae..0bcda05667a 100644 --- a/cpp/tests/groupby/histogram_tests.cpp +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -21,4 +21,55 @@ #include #include +#include #include +#include +#include + +template +struct GroupbyHistogramTest : public cudf::test::BaseFixture {}; + +template +struct GroupbyMergeHistogramTest : public cudf::test::BaseFixture {}; + +// Avoid unsigned types, as the tests below have negative values in their input. +using HistogramTestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes, + cudf::test::FixedPointTypes, + cudf::test::ChronoTypes>; +TYPED_TEST_SUITE(GroupbyHistogramTest, HistogramTestTypes); +TYPED_TEST_SUITE(GroupbyMergeHistogramTest, HistogramTestTypes); + +auto groupby_histogram(cudf::column_view const& keys, + cudf::column_view const& values, + std::unique_ptr&& agg) +{ + CUDF_EXPECTS( + agg->kind == cudf::aggregation::HISTOGRAM || agg->kind == cudf::aggregation::MERGE_HISTOGRAM, + "Aggregation must be either HISTOGRAM or MERGE_HISTOGRAM."); + + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].values = values; + requests[0].aggregations.push_back(std::move(agg)); + + auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); + auto const agg_results = gb_obj.aggregate(requests, cudf::test::get_default_stream()); + auto const agg_histogram = agg_results.second[0].results[0]->view(); + EXPECT_NE(agg_histogram.type().id(), cudf::type_id::LIST); + EXPECT_EQ(agg_histogram.num_children(), 2); + + auto const key_sort_order = cudf::sorted_order(agg_results.first->view(), {}, {}); + auto sorted_keys = + std::move(cudf::gather(agg_results.first->view(), *key_sort_order)->release().front()); + auto const sorted_vals = std::move( + cudf::gather(cudf::table_view({agg_results.second[0].results[0]->view()}), *key_sort_order) + ->release() + .front()); + auto sorted_histograms = cudf::lists::sort_lists(cudf::lists_column_view{*sorted_vals}, + cudf::order::ASCENDING, + cudf::null_order::BEFORE, + rmm::mr::get_current_device_resource()); + + return std::pair{std::move(sorted_keys), std::move(sorted_histograms)}; +} From baddf18786d37bfad1b41fb0bdb7b2dc666d0ac2 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 20:22:37 -0700 Subject: [PATCH 069/100] Move tests --- cpp/tests/reductions/reduction_tests.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 34e1011d294..9ee7527b0f5 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -380,16 +380,6 @@ TYPED_TEST(ReductionTest, SumOfSquare) expected_null_value); } -template -struct ReductionHistogramTest : public cudf::test::BaseFixture {}; - -// Avoid unsigned types, as the tests below have negative values in their input. -using HistogramTestTypes = cudf::test::Concat, - cudf::test::FloatingPointTypes, - cudf::test::FixedPointTypes, - cudf::test::ChronoTypes>; -TYPED_TEST_SUITE(ReductionHistogramTest, HistogramTestTypes); - auto histogram_reduction(cudf::column_view const& input, std::unique_ptr const& agg) { @@ -411,6 +401,16 @@ auto histogram_reduction(cudf::column_view const& input, return std::move(cudf::gather(cudf::table_view{{result_col}}, *sort_order)->release().front()); } +template +struct ReductionHistogramTest : public cudf::test::BaseFixture {}; + +// Avoid unsigned types, as the tests below have negative values in their input. +using HistogramTestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes, + cudf::test::FixedPointTypes, + cudf::test::ChronoTypes>; +TYPED_TEST_SUITE(ReductionHistogramTest, HistogramTestTypes); + TYPED_TEST(ReductionHistogramTest, Histogram) { using col_data = cudf::test::fixed_width_column_wrapper; From 0afad9cc94d272c6a85d52ad8f181f40162e791e Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 20:24:15 -0700 Subject: [PATCH 070/100] Rename alias --- cpp/tests/reductions/reduction_tests.cpp | 82 ++++++++++++------------ 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 9ee7527b0f5..74bd39e11ea 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -413,18 +413,18 @@ TYPED_TEST_SUITE(ReductionHistogramTest, HistogramTestTypes); TYPED_TEST(ReductionHistogramTest, Histogram) { - using col_data = cudf::test::fixed_width_column_wrapper; - using int64_data = cudf::test::fixed_width_column_wrapper; + using data_col = cudf::test::fixed_width_column_wrapper; + using int64_col = cudf::test::fixed_width_column_wrapper; using structs_col = cudf::test::structs_column_wrapper; auto const agg = cudf::make_histogram_aggregation(); // Empty input. { - auto const input = col_data{}; + auto const input = data_col{}; auto const expected = [] { - auto child1 = col_data{}; - auto child2 = int64_data{}; + auto child1 = data_col{}; + auto child2 = int64_col{}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -432,10 +432,10 @@ TYPED_TEST(ReductionHistogramTest, Histogram) } { - auto const input = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto const input = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; auto const expected = [] { - auto child1 = col_data{-3, -2, 0, 1, 2, 5}; - auto child2 = int64_data{2, 1, 1, 2, 4, 1}; + auto child1 = data_col{-3, -2, 0, 1, 2, 5}; + auto child2 = int64_col{2, 1, 1, 2, 4, 1}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -444,11 +444,11 @@ TYPED_TEST(ReductionHistogramTest, Histogram) // Test without nulls, sliced input. { - auto const input_original = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto const input_original = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; auto const input = cudf::slice(input_original, {0, 7})[0]; auto const expected = [] { - auto child1 = col_data{-3, 0, 1, 2, 5}; - auto child2 = int64_data{1, 1, 1, 3, 1}; + auto child1 = data_col{-3, 0, 1, 2, 5}; + auto child2 = int64_col{1, 1, 1, 3, 1}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -459,11 +459,11 @@ TYPED_TEST(ReductionHistogramTest, Histogram) using namespace cudf::test::iterators; auto constexpr null{0}; { - auto const input = col_data{{null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, + auto const input = data_col{{null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, nulls_at({0, 6, 9, 12})}; auto const expected = [] { - auto child1 = col_data{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; - auto child2 = int64_data{4, 2, 1, 1, 2, 4, 1}; + auto child1 = data_col{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_col{4, 2, 1, 1, 2, 4, 1}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -472,12 +472,12 @@ TYPED_TEST(ReductionHistogramTest, Histogram) // Test with nulls, sliced input. { - auto const input_original = col_data{ + auto const input_original = data_col{ {null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, nulls_at({0, 6, 9, 12})}; auto const input = cudf::slice(input_original, {0, 9})[0]; auto const expected = [] { - auto child1 = col_data{{null, -3, 0, 1, 2, 5}, null_at(0)}; - auto child2 = int64_data{2, 1, 1, 1, 3, 1}; + auto child1 = data_col{{null, -3, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_col{2, 1, 1, 1, 3, 1}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -487,8 +487,8 @@ TYPED_TEST(ReductionHistogramTest, Histogram) TYPED_TEST(ReductionHistogramTest, MergeHistogram) { - using col_data = cudf::test::fixed_width_column_wrapper; - using int64_data = cudf::test::fixed_width_column_wrapper; + using data_col = cudf::test::fixed_width_column_wrapper; + using int64_col = cudf::test::fixed_width_column_wrapper; using structs_col = cudf::test::structs_column_wrapper; auto const agg = cudf::make_merge_histogram_aggregation(); @@ -496,13 +496,13 @@ TYPED_TEST(ReductionHistogramTest, MergeHistogram) // Empty input. { auto const input = [] { - auto child1 = col_data{}; - auto child2 = int64_data{}; + auto child1 = data_col{}; + auto child2 = int64_col{}; return structs_col{{child1, child2}}; }(); auto const expected = [] { - auto child1 = col_data{}; - auto child2 = int64_data{}; + auto child1 = data_col{}; + auto child2 = int64_col{}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -512,14 +512,14 @@ TYPED_TEST(ReductionHistogramTest, MergeHistogram) // Test without nulls. { auto const input = [] { - auto child1 = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; - auto child2 = int64_data{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; + auto child1 = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto child2 = int64_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; return structs_col{{child1, child2}}; }(); auto const expected = [] { - auto child1 = col_data{-3, -2, 0, 1, 2, 5}; - auto child2 = int64_data{5, 5, 4, 5, 8, 1}; + auto child1 = data_col{-3, -2, 0, 1, 2, 5}; + auto child2 = int64_col{5, 5, 4, 5, 8, 1}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -529,15 +529,15 @@ TYPED_TEST(ReductionHistogramTest, MergeHistogram) // Test without nulls, sliced input. { auto const input_original = [] { - auto child1 = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; - auto child2 = int64_data{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; + auto child1 = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto child2 = int64_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; return structs_col{{child1, child2}}; }(); auto const input = cudf::slice(input_original, {0, 7})[0]; auto const expected = [] { - auto child1 = col_data{-3, 0, 1, 2, 5}; - auto child2 = int64_data{2, 4, 1, 5, 1}; + auto child1 = data_col{-3, 0, 1, 2, 5}; + auto child2 = int64_col{2, 4, 1, 5, 1}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -549,15 +549,15 @@ TYPED_TEST(ReductionHistogramTest, MergeHistogram) auto constexpr null{0}; { auto const input = [] { - auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, + auto child1 = data_col{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, nulls_at({2, 5, 8, 11, 15})}; - auto child2 = int64_data{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; + auto child2 = int64_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; return structs_col{{child1, child2}}; }(); auto const expected = [] { - auto child1 = col_data{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; - auto child2 = int64_data{67, 5, 5, 4, 5, 8, 1}; + auto child1 = data_col{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_col{67, 5, 5, 4, 5, 8, 1}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -567,16 +567,16 @@ TYPED_TEST(ReductionHistogramTest, MergeHistogram) // Test with nulls, sliced input. { auto const input_original = [] { - auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, + auto child1 = data_col{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, nulls_at({2, 5, 8, 11, 15})}; - auto child2 = int64_data{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; + auto child2 = int64_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; return structs_col{{child1, child2}}; }(); auto const input = cudf::slice(input_original, {0, 9})[0]; auto const expected = [] { - auto child1 = col_data{{null, -3, 0, 1, 2, 5}, null_at(0)}; - auto child2 = int64_data{33, 2, 4, 1, 3, 1}; + auto child1 = data_col{{null, -3, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64_col{33, 2, 4, 1, 3, 1}; return structs_col{{child1, child2}}; }(); auto const result = histogram_reduction(input, agg); @@ -1089,10 +1089,10 @@ TEST_F(ReductionEmptyTest, empty_column) // test if null count is equal or greater than size of input // expect result.is_valid() is false int col_size = 5; - std::vector col_data(col_size); + std::vector data_col(col_size); std::vector valids(col_size, 0); - cudf::test::fixed_width_column_wrapper col_nulls = construct_null_column(col_data, valids); + cudf::test::fixed_width_column_wrapper col_nulls = construct_null_column(data_col, valids); CUDF_EXPECT_NO_THROW(statement(col_nulls)); auto any_agg = cudf::make_any_aggregation(); From c05e5956d74c9fa546f75cf774cb060015f939ed Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 21:47:54 -0700 Subject: [PATCH 071/100] Add target types --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 930ec992384..784f05a964e 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1192,6 +1192,12 @@ struct target_type_impl { using type = size_type; }; +// Use list for HISTOGRAM +template +struct target_type_impl { + using type = list_view; +}; + // Computing ANY of any type, use bool accumulator template struct target_type_impl { @@ -1370,6 +1376,12 @@ struct target_type_impl { using type = struct_view; }; +// Use list for MERGE_HISTOGRAM +template +struct target_type_impl { + using type = list_view; +}; + // Always use double for COVARIANCE template struct target_type_impl { From 86530538a55d6219ea3e8c5c18b8fa4a969f16a1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 21:54:29 -0700 Subject: [PATCH 072/100] Add empty return --- cpp/src/groupby/groupby.cu | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index ce1fc71968f..ec200b52bbd 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -110,6 +110,20 @@ struct empty_column_constructor { 0, make_empty_column(type_to_id()), empty_like(values), 0, {}); } + if constexpr (k == aggregation::Kind::HISTOGRAM || k == aggregation::Kind::MERGE_HISTOGRAM) { + std::vector> struct_children; + struct_children.emplace_back(empty_like(values)); + struct_children.emplace_back(make_numeric_column(data_type{type_id::INT64}, 0)); + auto structs = std::make_unique(data_type{type_id::STRUCT}, + 0, + rmm::device_buffer{}, + rmm::device_buffer{}, + 0, + std::move(struct_children)); + return make_lists_column( + 0, make_empty_column(type_to_id()), std::move(structs), 0, {}); + } + if constexpr (k == aggregation::Kind::RANK) { auto const& rank_agg = dynamic_cast(agg); if (rank_agg._method == cudf::rank_method::AVERAGE or From 8d6fdfee9255a79245db4d3fb05ed0f689c5eb3a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 19 Sep 2023 21:55:19 -0700 Subject: [PATCH 073/100] MISC --- cpp/src/groupby/sort/group_histogram.cu | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index e042d88f837..63e6952c3b5 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -44,16 +44,8 @@ auto make_empty_histogram(column_view const& values) rmm::device_buffer{}, 0, std::move(struct_children)); - - std::vector> lists_children; - lists_children.emplace_back(make_numeric_column(data_type{type_to_id()}, 0)); - lists_children.emplace_back(std::move(structs)); - return std::make_unique(cudf::data_type{type_id::LIST}, - 0, - rmm::device_buffer{}, - rmm::device_buffer{}, - 0, - std::move(lists_children)); + return make_lists_column( + 0, make_empty_column(type_to_id()), std::move(structs), 0, {}); } std::unique_ptr build_histogram(column_view const& values, From d1fbda44fe501f5e2106db23491620dbb2d49da1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 09:55:39 -0700 Subject: [PATCH 074/100] Add more assertions --- cpp/tests/reductions/reduction_tests.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 74bd39e11ea..ed85da9e50b 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -393,12 +393,14 @@ auto histogram_reduction(cudf::column_view const& input, auto const result_list_scalar = dynamic_cast(result_scalar.get()); EXPECT_NE(result_list_scalar, nullptr); - auto const result_col = result_list_scalar->view(); - EXPECT_EQ(result_col.num_children(), 2); + auto const histogram = result_list_scalar->view(); + EXPECT_EQ(histogram.num_children(), 2); + EXPECT_EQ(histogram.null_count(), 0); + EXPECT_EQ(histogram.child(1).null_count(), 0); // Sort the histogram based on the first column (unique input values). - auto const sort_order = cudf::sorted_order(cudf::table_view{{result_col.child(0)}}, {}, {}); - return std::move(cudf::gather(cudf::table_view{{result_col}}, *sort_order)->release().front()); + auto const sort_order = cudf::sorted_order(cudf::table_view{{histogram.child(0)}}, {}, {}); + return std::move(cudf::gather(cudf::table_view{{histogram}}, *sort_order)->release().front()); } template From 199d97b0019b6cf2ee5a6a51beb7b10db39821b5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 10:09:02 -0700 Subject: [PATCH 075/100] Implement unit tests for groupby histogram --- cpp/tests/groupby/histogram_tests.cpp | 291 ++++++++++++++++++++++++-- 1 file changed, 269 insertions(+), 22 deletions(-) diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp index 0bcda05667a..3ea0c07b68a 100644 --- a/cpp/tests/groupby/histogram_tests.cpp +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -26,46 +26,45 @@ #include #include -template -struct GroupbyHistogramTest : public cudf::test::BaseFixture {}; - -template -struct GroupbyMergeHistogramTest : public cudf::test::BaseFixture {}; - -// Avoid unsigned types, as the tests below have negative values in their input. -using HistogramTestTypes = cudf::test::Concat, - cudf::test::FloatingPointTypes, - cudf::test::FixedPointTypes, - cudf::test::ChronoTypes>; -TYPED_TEST_SUITE(GroupbyHistogramTest, HistogramTestTypes); -TYPED_TEST_SUITE(GroupbyMergeHistogramTest, HistogramTestTypes); +using int32s_col = cudf::test::fixed_width_column_wrapper; +using int64s_col = cudf::test::fixed_width_column_wrapper; +using structs_col = cudf::test::structs_column_wrapper; auto groupby_histogram(cudf::column_view const& keys, cudf::column_view const& values, - std::unique_ptr&& agg) + cudf::aggregation::Kind agg_kind) { CUDF_EXPECTS( - agg->kind == cudf::aggregation::HISTOGRAM || agg->kind == cudf::aggregation::MERGE_HISTOGRAM, + agg_kind == cudf::aggregation::HISTOGRAM || agg_kind == cudf::aggregation::MERGE_HISTOGRAM, "Aggregation must be either HISTOGRAM or MERGE_HISTOGRAM."); std::vector requests; requests.emplace_back(cudf::groupby::aggregation_request()); requests[0].values = values; - requests[0].aggregations.push_back(std::move(agg)); + if (agg_kind == cudf::aggregation::HISTOGRAM) { + requests[0].aggregations.push_back( + cudf::make_histogram_aggregation()); + } else { + requests[0].aggregations.push_back( + cudf::make_merge_histogram_aggregation()); + } auto gb_obj = cudf::groupby::groupby(cudf::table_view({keys})); auto const agg_results = gb_obj.aggregate(requests, cudf::test::get_default_stream()); auto const agg_histogram = agg_results.second[0].results[0]->view(); - EXPECT_NE(agg_histogram.type().id(), cudf::type_id::LIST); - EXPECT_EQ(agg_histogram.num_children(), 2); + EXPECT_EQ(agg_histogram.type().id(), cudf::type_id::LIST); + EXPECT_EQ(agg_histogram.null_count(), 0); + + auto const histograms = cudf::lists_column_view{agg_histogram}.child(); + EXPECT_EQ(histograms.num_children(), 2); + EXPECT_EQ(histograms.null_count(), 0); + EXPECT_EQ(histograms.child(1).null_count(), 0); auto const key_sort_order = cudf::sorted_order(agg_results.first->view(), {}, {}); auto sorted_keys = std::move(cudf::gather(agg_results.first->view(), *key_sort_order)->release().front()); - auto const sorted_vals = std::move( - cudf::gather(cudf::table_view({agg_results.second[0].results[0]->view()}), *key_sort_order) - ->release() - .front()); + auto const sorted_vals = + std::move(cudf::gather(cudf::table_view{{agg_histogram}}, *key_sort_order)->release().front()); auto sorted_histograms = cudf::lists::sort_lists(cudf::lists_column_view{*sorted_vals}, cudf::order::ASCENDING, cudf::null_order::BEFORE, @@ -73,3 +72,251 @@ auto groupby_histogram(cudf::column_view const& keys, return std::pair{std::move(sorted_keys), std::move(sorted_histograms)}; } + +template +struct GroupbyHistogramTest : public cudf::test::BaseFixture {}; + +template +struct GroupbyMergeHistogramTest : public cudf::test::BaseFixture {}; + +// Avoid unsigned types, as the tests below have negative values in their input. +using HistogramTestTypes = cudf::test::Concat, + cudf::test::FloatingPointTypes, + cudf::test::FixedPointTypes, + cudf::test::ChronoTypes>; +TYPED_TEST_SUITE(GroupbyHistogramTest, HistogramTestTypes); +TYPED_TEST_SUITE(GroupbyMergeHistogramTest, HistogramTestTypes); + +TYPED_TEST(GroupbyHistogramTest, EmptyInput) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + auto const keys = int32s_col{}; + auto const values = col_data{}; + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + + // The structure of the output is already verified in the function `groupby_histogram`. + ASSERT_EQ(res_histogram->size(), 0); +} + +TYPED_TEST(GroupbyHistogramTest, SimpleInputNoNull) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + // key = 0: values = [2, 2, -3, -2, 2] + // key = 1: values = [2, 0, 5, 2, 1] + // key = 2: values = [-3, 1, 1, 2, 2] + auto const keys = int32s_col{2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2}; + auto const values = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2}; + auto const expected_keys = int32s_col{0, 1, 2}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{-3, -2, 2, 0, 1, 2, 5, -3, 1, 2}; + auto counts = int64s_col{1, 1, 3, 1, 1, 2, 1, 1, 2, 2}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 3, int32s_col{0, 3, 7, 10}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyHistogramTest, SlicedInputNoNull) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + auto const keys_original = int32s_col{2, 0, 2, 1, 0, 2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2}; + auto const values_original = + col_data{1, 2, 0, 2, 1, -3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2}; + + // key = 0: values = [2, 2, -3, -2, 2] + // key = 1: values = [2, 0, 5, 2, 1] + // key = 2: values = [-3, 1, 1, 2, 2] + auto const keys = cudf::slice(keys_original, {5, 20})[0]; + auto const values = cudf::slice(values_original, {5, 20})[0]; + + auto const expected_keys = int32s_col{0, 1, 2}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{-3, -2, 2, 0, 1, 2, 5, -3, 1, 2}; + auto counts = int64s_col{1, 1, 3, 1, 1, 2, 1, 1, 2, 2}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 3, int32s_col{0, 3, 7, 10}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyHistogramTest, InputWithNulls) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using namespace cudf::test::iterators; + auto constexpr null{0}; + + // key = 0: values = [-3, null, 2, null, 2] + // key = 1: values = [1, 2, null, 5, 2, -3, 1, 1] + // key = 2: values = [null, 2, 0, -2, 2, null, 2] + auto const keys = int32s_col{2, 0, 2, 1, 1, 1, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 2, 1, 0, 2}; + auto const values = + col_data{{null, -3, 2, 1, 2, null, 0, 5, 2, null, -3, -2, 2, null, 1, 2, null, 1, 2, 2}, + nulls_at({0, 5, 9, 13, 16})}; + auto const expected_keys = int32s_col{0, 1, 2}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{{null, -3, 2, null, -3, 1, 2, 5, null, -2, 0, 2}, nulls_at({0, 3, 8})}; + auto counts = int64s_col{2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 1, 3}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 3, int32s_col{0, 3, 8, 12}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyHistogramTest, SlicedInputWithNulls) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using namespace cudf::test::iterators; + auto constexpr null{0}; + + auto const keys_original = + int32s_col{1, 0, 2, 2, 0, 2, 0, 2, 1, 1, 1, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 2, 1, 0, 2, 0, 1, 2}; + auto const values_original = + col_data{{null, 1, 1, 2, 1, null, -3, 2, 1, 2, null, 0, 5, 2, + null, -3, -2, 2, null, 1, 2, null, 1, 2, 2, null, 1, 2}, + nulls_at({0, 5, 10, 14, 18, 21, 25})}; + + // key = 0: values = [-3, null, 2, null, 2] + // key = 1: values = [1, 2, null, 5, 2, -3, 1, 1] + // key = 2: values = [null, 2, 0, -2, 2, null, 2] + auto const keys = cudf::slice(keys_original, {5, 25})[0]; + auto const values = cudf::slice(values_original, {5, 25})[0]; + + auto const expected_keys = int32s_col{0, 1, 2}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{{null, -3, 2, null, -3, 1, 2, 5, null, -2, 0, 2}, nulls_at({0, 3, 8})}; + auto counts = int64s_col{2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 1, 3}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 3, int32s_col{0, 3, 8, 12}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +#if 0 +TYPED_TEST(GroupbyMergeHistogramTest, MergeHistogram) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using int64s_col = cudf::test::fixed_width_column_wrapper; + using structs_col = cudf::test::structs_column_wrapper; + + auto const agg = cudf::make_merge_histogram_aggregation(); + + // Empty input. + { + auto const input = [] { + auto child1 = col_data{}; + auto child2 = int64s_col{}; + return structs_col{{child1, child2}}; + }(); + auto const expected = [] { + auto child1 = col_data{}; + auto child2 = int64s_col{}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test without nulls. + { + auto const input = [] { + auto child1 = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto child2 = int64s_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; + return structs_col{{child1, child2}}; + }(); + + auto const expected = [] { + auto child1 = col_data{-3, -2, 0, 1, 2, 5}; + auto child2 = int64s_col{5, 5, 4, 5, 8, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test without nulls, sliced input. + { + auto const input_original = [] { + auto child1 = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; + auto child2 = int64s_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; + return structs_col{{child1, child2}}; + }(); + auto const input = cudf::slice(input_original, {0, 7})[0]; + + auto const expected = [] { + auto child1 = col_data{-3, 0, 1, 2, 5}; + auto child2 = int64s_col{2, 4, 1, 5, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with nulls. + using namespace cudf::test::iterators; + auto constexpr null{0}; + { + auto const input = [] { + auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, + nulls_at({2, 5, 8, 11, 15})}; + auto child2 = int64s_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; + return structs_col{{child1, child2}}; + }(); + + auto const expected = [] { + auto child1 = col_data{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64s_col{67, 5, 5, 4, 5, 8, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with nulls, sliced input. + { + auto const input_original = [] { + auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, + nulls_at({2, 5, 8, 11, 15})}; + auto child2 = int64s_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; + return structs_col{{child1, child2}}; + }(); + auto const input = cudf::slice(input_original, {0, 9})[0]; + + auto const expected = [] { + auto child1 = col_data{{null, -3, 0, 1, 2, 5}, null_at(0)}; + auto child2 = int64s_col{33, 2, 4, 1, 3, 1}; + return structs_col{{child1, child2}}; + }(); + auto const result = histogram_reduction(input, agg); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } +} + +#endif From 4b0983e810c1427868100be532cd8cb32b5b0d66 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 11:34:59 -0700 Subject: [PATCH 076/100] Reimplement merge histogram --- cpp/src/groupby/sort/group_histogram.cu | 44 ++++++++++++++++++----- cpp/src/groupby/sort/group_reductions.hpp | 4 +-- cpp/src/reductions/histogram.cu | 5 ++- 3 files changed, 39 insertions(+), 14 deletions(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 63e6952c3b5..88724c54112 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -20,12 +20,15 @@ #include #include #include +#include #include #include #include #include +#include + namespace cudf::groupby::detail { // Fixed type for counting frequencies in historam. @@ -55,9 +58,9 @@ std::unique_ptr build_histogram(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(num_groups >= 0, "number of groups cannot be negative"); + CUDF_EXPECTS(num_groups >= 0, "Number of groups cannot be negative."); CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), - "Size of values column should be same as that of group labels"); + "Size of values column should be same as that of group labels."); if (num_groups == 0) { return make_empty_histogram(values); } @@ -108,26 +111,49 @@ std::unique_ptr group_histogram(column_view const& values, } std::unique_ptr group_merge_histogram(column_view const& values, - cudf::device_span group_labels, + cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // The input must be a lists column without nulls. CUDF_EXPECTS(!values.has_nulls(), "The input column must not have nulls."); + CUDF_EXPECTS(values.type().id() == type_id::LIST, + "The input of MERGE_HISTOGRAM aggregation must be a lists column."); + + // Child of the input lists column must be a structs column without nulls, + // and its second child is a count columns of integer type having no nulls. + auto const lists_cv = lists_column_view{values}; + auto const histogram_cv = lists_cv.get_sliced_child(stream); + CUDF_EXPECTS(!histogram_cv.has_nulls(), "Child of the input lists column must not have nulls."); + CUDF_EXPECTS(histogram_cv.type().id() == type_id::STRUCT && histogram_cv.num_children() == 2, + "The input column has invalid histograms structure."); CUDF_EXPECTS( - values.type().id() == type_id::STRUCT && values.num_children() == 2, - "The input of merge_histogram aggregation must be a struct column having two children."); - CUDF_EXPECTS(cudf::is_integral(values.child(1).type()) && !values.child(1).has_nulls(), - "The second child of the input column must be integral type and has no nulls."); + cudf::is_integral(histogram_cv.child(1).type()) && !histogram_cv.child(1).has_nulls(), + "The input column has invalid histograms structure."); if (num_groups == 0) { return empty_like(values); } + // Firstly concatenate the histograms corresponding to the same key values. + // That is equivalent to creating a new lists column (view) from the input lists column + // with new offsets as below. + auto new_offsets = rmm::device_uvector(num_groups + 1, stream); + thrust::gather(rmm::exec_policy(stream), + group_offsets.begin(), + group_offsets.end(), + lists_cv.offsets_begin(), + new_offsets.begin()); + + auto key_labels = rmm::device_uvector(histogram_cv.size(), stream); + cudf::detail::label_segments( + new_offsets.begin(), new_offsets.end(), key_labels.begin(), key_labels.end(), stream); + // The input values column is already in histogram format (i.e., column of Struct). - auto const structs_cv = structs_column_view{values}; + auto const structs_cv = structs_column_view{histogram_cv}; auto const input_values = structs_cv.get_sliced_child(0, stream); auto const input_counts = structs_cv.get_sliced_child(1, stream); - return build_histogram(input_values, group_labels, input_counts, num_groups, stream, mr); + return build_histogram(input_values, key_labels, input_counts, num_groups, stream, mr); } } // namespace cudf::groupby::detail diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index c1d42987906..52ffb9fd9da 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -485,13 +485,13 @@ std::unique_ptr group_merge_m2(column_view const& values, * @endcode * * @param values Grouped values to get valid count of - * @param group_labels ID of group that the corresponding value belongs to + * @param group_offsets Offsets of groups' starting points within @p values. * @param num_groups Number of groups ( unique values in @p group_labels ) * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory */ std::unique_ptr group_merge_histogram(column_view const& values, - cudf::device_span group_labels, + cudf::device_span group_offsets, size_type num_groups, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 046068b3f0d..67aea83eee5 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -267,9 +267,8 @@ std::unique_ptr merge_histogram(column_view const& input, rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls."); - CUDF_EXPECTS( - input.type().id() == type_id::STRUCT && input.num_children() == 2, - "The input of merge_histogram aggregation must be a struct column having two children."); + CUDF_EXPECTS(input.type().id() == type_id::STRUCT && input.num_children() == 2, + "The input must be a structs column having two children."); CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(), "The second child of the input column must be integral type and has no nulls."); From 0a8a03dd313846880d09aee822f98d42563d8d99 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 13:40:03 -0700 Subject: [PATCH 077/100] Implement unit tests for merge histogram --- cpp/tests/groupby/histogram_tests.cpp | 237 +++++++++++++++++--------- 1 file changed, 152 insertions(+), 85 deletions(-) diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp index 3ea0c07b68a..fb3faebb951 100644 --- a/cpp/tests/groupby/histogram_tests.cpp +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -39,7 +39,7 @@ auto groupby_histogram(cudf::column_view const& keys, "Aggregation must be either HISTOGRAM or MERGE_HISTOGRAM."); std::vector requests; - requests.emplace_back(cudf::groupby::aggregation_request()); + requests.emplace_back(); requests[0].values = values; if (agg_kind == cudf::aggregation::HISTOGRAM) { requests[0].aggregations.push_back( @@ -107,8 +107,9 @@ TYPED_TEST(GroupbyHistogramTest, SimpleInputNoNull) // key = 0: values = [2, 2, -3, -2, 2] // key = 1: values = [2, 0, 5, 2, 1] // key = 2: values = [-3, 1, 1, 2, 2] - auto const keys = int32s_col{2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2}; - auto const values = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2}; + auto const keys = int32s_col{2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2}; + auto const values = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2}; + auto const expected_keys = int32s_col{0, 1, 2}; auto const expected_histogram = [] { auto structs = [] { @@ -119,6 +120,7 @@ TYPED_TEST(GroupbyHistogramTest, SimpleInputNoNull) return cudf::make_lists_column( 3, int32s_col{0, 3, 7, 10}.release(), structs.release(), 0, rmm::device_buffer{}); }(); + auto const [res_keys, res_histogram] = groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); @@ -132,7 +134,6 @@ TYPED_TEST(GroupbyHistogramTest, SlicedInputNoNull) auto const keys_original = int32s_col{2, 0, 2, 1, 0, 2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2}; auto const values_original = col_data{1, 2, 0, 2, 1, -3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2}; - // key = 0: values = [2, 2, -3, -2, 2] // key = 1: values = [2, 0, 5, 2, 1] // key = 2: values = [-3, 1, 1, 2, 2] @@ -149,6 +150,7 @@ TYPED_TEST(GroupbyHistogramTest, SlicedInputNoNull) return cudf::make_lists_column( 3, int32s_col{0, 3, 7, 10}.release(), structs.release(), 0, rmm::device_buffer{}); }(); + auto const [res_keys, res_histogram] = groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); @@ -168,6 +170,7 @@ TYPED_TEST(GroupbyHistogramTest, InputWithNulls) auto const values = col_data{{null, -3, 2, 1, 2, null, 0, 5, 2, null, -3, -2, 2, null, 1, 2, null, 1, 2, 2}, nulls_at({0, 5, 9, 13, 16})}; + auto const expected_keys = int32s_col{0, 1, 2}; auto const expected_histogram = [] { auto structs = [] { @@ -178,6 +181,7 @@ TYPED_TEST(GroupbyHistogramTest, InputWithNulls) return cudf::make_lists_column( 3, int32s_col{0, 3, 8, 12}.release(), structs.release(), 0, rmm::device_buffer{}); }(); + auto const [res_keys, res_histogram] = groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); @@ -213,110 +217,173 @@ TYPED_TEST(GroupbyHistogramTest, SlicedInputWithNulls) return cudf::make_lists_column( 3, int32s_col{0, 3, 8, 12}.release(), structs.release(), 0, rmm::device_buffer{}); }(); + auto const [res_keys, res_histogram] = groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); } -#if 0 -TYPED_TEST(GroupbyMergeHistogramTest, MergeHistogram) +TYPED_TEST(GroupbyMergeHistogramTest, EmptyInput) { - using col_data = cudf::test::fixed_width_column_wrapper; - using int64s_col = cudf::test::fixed_width_column_wrapper; - using structs_col = cudf::test::structs_column_wrapper; - - auto const agg = cudf::make_merge_histogram_aggregation(); - - // Empty input. - { - auto const input = [] { - auto child1 = col_data{}; - auto child2 = int64s_col{}; - return structs_col{{child1, child2}}; - }(); - auto const expected = [] { - auto child1 = col_data{}; - auto child2 = int64s_col{}; - return structs_col{{child1, child2}}; - }(); - auto const result = histogram_reduction(input, agg); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); - } + using col_data = cudf::test::fixed_width_column_wrapper; - // Test without nulls. - { - auto const input = [] { - auto child1 = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; - auto child2 = int64s_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; - return structs_col{{child1, child2}}; + auto const keys = int32s_col{}; + auto const values = col_data{}; + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::MERGE_HISTOGRAM); + + // The structure of the output is already verified in the function `groupby_histogram`. + ASSERT_EQ(res_histogram->size(), 0); +} + +TYPED_TEST(GroupbyMergeHistogramTest, SimpleInputNoNull) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + // key = 0: histograms = [[<-3, 1>, <-2, 1>, <2, 3>], [<0, 1>, <1, 1>], [<-3, 3>, <0, 1>, <1, 2>]] + // key = 1: histograms = [[<-2, 1>, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]] + auto const keys = int32s_col{0, 1, 0, 1, 0}; + auto const values = [] { + auto structs = [] { + auto values = col_data{-3, -2, 2, -2, 1, 2, 0, 1, 0, 1, 2, -3, 0, 1}; + auto counts = int64s_col{1, 1, 3, 1, 3, 2, 1, 1, 2, 1, 2, 3, 1, 2}; + return structs_col{{values, counts}}; }(); + return cudf::make_lists_column( + 5, int32s_col{0, 3, 6, 8, 11, 14}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); - auto const expected = [] { - auto child1 = col_data{-3, -2, 0, 1, 2, 5}; - auto child2 = int64s_col{5, 5, 4, 5, 8, 1}; - return structs_col{{child1, child2}}; + auto const expected_keys = int32s_col{0, 1}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{-3, -2, 0, 1, 2, -2, 0, 1, 2}; + auto counts = int64s_col{4, 1, 2, 3, 3, 1, 2, 4, 4}; + return structs_col{{values, counts}}; }(); - auto const result = histogram_reduction(input, agg); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); - } + return cudf::make_lists_column( + 2, int32s_col{0, 5, 9}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); - // Test without nulls, sliced input. - { - auto const input_original = [] { - auto child1 = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1}; - auto child2 = int64s_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4}; - return structs_col{{child1, child2}}; + auto const [res_keys, res_histogram] = + groupby_histogram(keys, *values, cudf::aggregation::MERGE_HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyMergeHistogramTest, SlicedInputNoNull) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + + // key = 0: histograms = [[<-3, 1>, <-2, 1>, <2, 3>], [<0, 1>, <1, 1>], [<-3, 3>, <0, 1>, <1, 2>]] + // key = 1: histograms = [[<-2, 1>, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]] + auto const keys_original = int32s_col{0, 1, 0, 1, 0, 1, 0}; + auto const values_original = [] { + auto structs = [] { + auto values = col_data{0, 2, -3, 1, -3, -2, 2, -2, 1, 2, 0, 1, 0, 1, 2, -3, 0, 1}; + auto counts = int64s_col{1, 2, 3, 1, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, 2, 3, 1, 2}; + return structs_col{{values, counts}}; }(); - auto const input = cudf::slice(input_original, {0, 7})[0]; + return cudf::make_lists_column(7, + int32s_col{0, 2, 4, 7, 10, 12, 15, 18}.release(), + structs.release(), + 0, + rmm::device_buffer{}); + }(); + auto const keys = cudf::slice(keys_original, {2, 7})[0]; + auto const values = cudf::slice(*values_original, {2, 7})[0]; - auto const expected = [] { - auto child1 = col_data{-3, 0, 1, 2, 5}; - auto child2 = int64s_col{2, 4, 1, 5, 1}; - return structs_col{{child1, child2}}; + auto const expected_keys = int32s_col{0, 1}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{-3, -2, 0, 1, 2, -2, 0, 1, 2}; + auto counts = int64s_col{4, 1, 2, 3, 3, 1, 2, 4, 4}; + return structs_col{{values, counts}}; }(); - auto const result = histogram_reduction(input, agg); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); - } + return cudf::make_lists_column( + 2, int32s_col{0, 5, 9}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); + + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::MERGE_HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} - // Test with nulls. +TYPED_TEST(GroupbyMergeHistogramTest, InputWithNulls) +{ + using col_data = cudf::test::fixed_width_column_wrapper; using namespace cudf::test::iterators; auto constexpr null{0}; - { - auto const input = [] { - auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, - nulls_at({2, 5, 8, 11, 15})}; - auto child2 = int64s_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; - return structs_col{{child1, child2}}; + + // key = 0: histograms = [[, <2, 3>], [, <1, 1>], [<0, 1>, <1, 2>]] + // key = 1: histograms = [[, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]] + auto const keys = int32s_col{0, 1, 1, 0, 0}; + auto const values = [] { + auto structs = [] { + auto values = col_data{{null, 2, null, 1, 2, 0, 1, 2, null, 1, 0, 1}, nulls_at({0, 2, 8})}; + auto counts = int64s_col{1, 3, 1, 3, 2, 2, 1, 2, 2, 1, 1, 2}; + return structs_col{{values, counts}}; }(); + return cudf::make_lists_column( + 5, int32s_col{0, 2, 5, 8, 10, 12}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); - auto const expected = [] { - auto child1 = col_data{{null, -3, -2, 0, 1, 2, 5}, null_at(0)}; - auto child2 = int64s_col{67, 5, 5, 4, 5, 8, 1}; - return structs_col{{child1, child2}}; + auto const expected_keys = int32s_col{0, 1}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{{null, 0, 1, 2, null, 0, 1, 2}, nulls_at({0, 4})}; + auto counts = int64s_col{3, 1, 3, 3, 1, 2, 4, 4}; + return structs_col{{values, counts}}; }(); - auto const result = histogram_reduction(input, agg); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); - } + return cudf::make_lists_column( + 2, int32s_col{0, 4, 8}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); - // Test with nulls, sliced input. - { - auto const input_original = [] { - auto child1 = col_data{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null}, - nulls_at({2, 5, 8, 11, 15})}; - auto child2 = int64s_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19}; - return structs_col{{child1, child2}}; + auto const [res_keys, res_histogram] = + groupby_histogram(keys, *values, cudf::aggregation::MERGE_HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} + +TYPED_TEST(GroupbyMergeHistogramTest, SlicedInputWithNulls) +{ + using col_data = cudf::test::fixed_width_column_wrapper; + using namespace cudf::test::iterators; + auto constexpr null{0}; + + // key = 0: histograms = [[, <2, 3>], [, <1, 1>], [<0, 1>, <1, 2>]] + // key = 1: histograms = [[, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]] + auto const keys_original = int32s_col{0, 1, 0, 1, 1, 0, 0}; + auto const values_original = [] { + auto structs = [] { + auto values = col_data{{null, 2, null, 1, null, 2, null, 1, 2, 0, 1, 2, null, 1, 0, 1}, + nulls_at({0, 2, 4, 6, 12})}; + auto counts = int64s_col{1, 3, 2, 1, 1, 3, 1, 3, 2, 2, 1, 2, 2, 1, 1, 2}; + return structs_col{{values, counts}}; }(); - auto const input = cudf::slice(input_original, {0, 9})[0]; + return cudf::make_lists_column(7, + int32s_col{0, 2, 4, 6, 9, 12, 14, 16}.release(), + structs.release(), + 0, + rmm::device_buffer{}); + }(); + auto const keys = cudf::slice(keys_original, {2, 7})[0]; + auto const values = cudf::slice(*values_original, {2, 7})[0]; - auto const expected = [] { - auto child1 = col_data{{null, -3, 0, 1, 2, 5}, null_at(0)}; - auto child2 = int64s_col{33, 2, 4, 1, 3, 1}; - return structs_col{{child1, child2}}; + auto const expected_keys = int32s_col{0, 1}; + auto const expected_histogram = [] { + auto structs = [] { + auto values = col_data{{null, 0, 1, 2, null, 0, 1, 2}, nulls_at({0, 4})}; + auto counts = int64s_col{3, 1, 3, 3, 1, 2, 4, 4}; + return structs_col{{values, counts}}; }(); - auto const result = histogram_reduction(input, agg); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); - } -} + return cudf::make_lists_column( + 2, int32s_col{0, 4, 8}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); -#endif + auto const [res_keys, res_histogram] = + groupby_histogram(keys, values, cudf::aggregation::MERGE_HISTOGRAM); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram); +} From 201d432a331527ff04458700a19844a2bf5ecc85 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 13:54:45 -0700 Subject: [PATCH 078/100] Fix empty output for merge histogram --- cpp/src/groupby/groupby.cu | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index ec200b52bbd..3ec40266b16 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -110,16 +110,16 @@ struct empty_column_constructor { 0, make_empty_column(type_to_id()), empty_like(values), 0, {}); } - if constexpr (k == aggregation::Kind::HISTOGRAM || k == aggregation::Kind::MERGE_HISTOGRAM) { - std::vector> struct_children; - struct_children.emplace_back(empty_like(values)); - struct_children.emplace_back(make_numeric_column(data_type{type_id::INT64}, 0)); - auto structs = std::make_unique(data_type{type_id::STRUCT}, - 0, - rmm::device_buffer{}, - rmm::device_buffer{}, - 0, - std::move(struct_children)); + if constexpr (k == aggregation::Kind::HISTOGRAM) { + std::vector> struct_children; + struct_children.emplace_back(empty_like(values)); + struct_children.emplace_back(make_numeric_column(data_type{type_id::INT64}, 0)); + auto structs = std::make_unique(data_type{type_id::STRUCT}, + 0, + rmm::device_buffer{}, + rmm::device_buffer{}, + 0, + std::move(struct_children)); return make_lists_column( 0, make_empty_column(type_to_id()), std::move(structs), 0, {}); } From edf68160231d023f250c31cbdd8fdd615a57bb66 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 13:54:53 -0700 Subject: [PATCH 079/100] Fix empty input test --- cpp/tests/groupby/histogram_tests.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp index fb3faebb951..3345c483d3a 100644 --- a/cpp/tests/groupby/histogram_tests.cpp +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -229,9 +229,17 @@ TYPED_TEST(GroupbyMergeHistogramTest, EmptyInput) using col_data = cudf::test::fixed_width_column_wrapper; auto const keys = int32s_col{}; - auto const values = col_data{}; + auto const values = [] { + auto structs = [] { + auto values = col_data{}; + auto counts = int64s_col{}; + return structs_col{{values, counts}}; + }(); + return cudf::make_lists_column( + 0, int32s_col{}.release(), structs.release(), 0, rmm::device_buffer{}); + }(); auto const [res_keys, res_histogram] = - groupby_histogram(keys, values, cudf::aggregation::MERGE_HISTOGRAM); + groupby_histogram(keys, *values, cudf::aggregation::MERGE_HISTOGRAM); // The structure of the output is already verified in the function `groupby_histogram`. ASSERT_EQ(res_histogram->size(), 0); From 8ac649ecf7dfd23c9efdc5d674a3603bff0fe219 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 13:56:07 -0700 Subject: [PATCH 080/100] Remove comment --- cpp/src/groupby/sort/group_histogram.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 88724c54112..29a2debe741 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -32,7 +32,6 @@ namespace cudf::groupby::detail { // Fixed type for counting frequencies in historam. -// This is to avoid using `target_type_t` which requires type_dispatcher. constexpr auto histogram_count_dtype = data_type{type_to_id()}; namespace { From 04965fa2a972fed16ce7a9e57bf496c52d0c71c3 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 14:09:23 -0700 Subject: [PATCH 081/100] Cleanup --- cpp/src/groupby/sort/group_histogram.cu | 40 +++++++++---------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 29a2debe741..927cd43d040 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -35,20 +35,6 @@ namespace cudf::groupby::detail { constexpr auto histogram_count_dtype = data_type{type_to_id()}; namespace { -auto make_empty_histogram(column_view const& values) -{ - std::vector> struct_children; - struct_children.emplace_back(empty_like(values)); - struct_children.emplace_back(make_numeric_column(histogram_count_dtype, 0)); - auto structs = std::make_unique(data_type{type_id::STRUCT}, - 0, - rmm::device_buffer{}, - rmm::device_buffer{}, - 0, - std::move(struct_children)); - return make_lists_column( - 0, make_empty_column(type_to_id()), std::move(structs), 0, {}); -} std::unique_ptr build_histogram(column_view const& values, cudf::device_span group_labels, @@ -59,9 +45,7 @@ std::unique_ptr build_histogram(column_view const& values, { CUDF_EXPECTS(num_groups >= 0, "Number of groups cannot be negative."); CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), - "Size of values column should be same as that of group labels."); - - if (num_groups == 0) { return make_empty_histogram(values); } + "Size of values column should be the same as that of group labels."); // Attach group labels to the input values. auto const labels_cv = column_view{data_type{type_to_id()}, @@ -75,7 +59,7 @@ std::unique_ptr build_histogram(column_view const& values, auto [distinct_indices, distinct_counts] = cudf::reduction::detail::table_histogram( labeled_values, partial_counts, histogram_count_dtype, stream, mr); - // Gather the distinct rows for output histogram. + // Gather the distinct rows for the output histogram. auto out_table = cudf::detail::gather(labeled_values, distinct_indices, out_of_bounds_policy::DONT_CHECK, @@ -83,8 +67,8 @@ std::unique_ptr build_histogram(column_view const& values, stream, mr); - // Build offsets for the output lists column. - // Each list will be a histogram corresponding to each value group. + // Build offsets for the output lists column containing output histograms. + // Each list will be a histogram corresponding to one value group. auto out_offsets = cudf::lists::detail::reconstruct_offsets( out_table->get_column(0).view(), num_groups, stream, mr); @@ -106,6 +90,9 @@ std::unique_ptr group_histogram(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // Empty group should be handled before reaching here. + CUDF_EXPECTS(num_groups > 0, "Group should not be empty."); + return build_histogram(values, group_labels, std::nullopt, num_groups, stream, mr); } @@ -115,13 +102,16 @@ std::unique_ptr group_merge_histogram(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // Empty group should be handled before reaching here. + CUDF_EXPECTS(num_groups > 0, "Group should not be empty."); + // The input must be a lists column without nulls. CUDF_EXPECTS(!values.has_nulls(), "The input column must not have nulls."); CUDF_EXPECTS(values.type().id() == type_id::LIST, "The input of MERGE_HISTOGRAM aggregation must be a lists column."); // Child of the input lists column must be a structs column without nulls, - // and its second child is a count columns of integer type having no nulls. + // and its second child is a columns of integer type having no nulls. auto const lists_cv = lists_column_view{values}; auto const histogram_cv = lists_cv.get_sliced_child(stream); CUDF_EXPECTS(!histogram_cv.has_nulls(), "Child of the input lists column must not have nulls."); @@ -131,11 +121,9 @@ std::unique_ptr group_merge_histogram(column_view const& values, cudf::is_integral(histogram_cv.child(1).type()) && !histogram_cv.child(1).has_nulls(), "The input column has invalid histograms structure."); - if (num_groups == 0) { return empty_like(values); } - - // Firstly concatenate the histograms corresponding to the same key values. + // Concatenate the histograms corresponding to the same key values. // That is equivalent to creating a new lists column (view) from the input lists column - // with new offsets as below. + // with new offsets gathered as below. auto new_offsets = rmm::device_uvector(num_groups + 1, stream); thrust::gather(rmm::exec_policy(stream), group_offsets.begin(), @@ -143,11 +131,11 @@ std::unique_ptr group_merge_histogram(column_view const& values, lists_cv.offsets_begin(), new_offsets.begin()); + // Generate labels for the new lists. auto key_labels = rmm::device_uvector(histogram_cv.size(), stream); cudf::detail::label_segments( new_offsets.begin(), new_offsets.end(), key_labels.begin(), key_labels.end(), stream); - // The input values column is already in histogram format (i.e., column of Struct). auto const structs_cv = structs_column_view{histogram_cv}; auto const input_values = structs_cv.get_sliced_child(0, stream); auto const input_counts = structs_cv.get_sliced_child(1, stream); From 63ef1fa5e41a816d8e523167d6d1e1dbdb2307bf Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 14:17:51 -0700 Subject: [PATCH 082/100] Fix docs --- cpp/src/groupby/sort/group_reductions.hpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 52ffb9fd9da..3aa79f226a3 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -234,8 +234,8 @@ std::unique_ptr group_count_all(cudf::device_span group * * @param values Grouped values to compute histogram * @param group_labels ID of group that the corresponding value belongs to - * @param num_groups Number of groups ( unique values in @p group_labels ) - * @param stream CUDA stream used for device memory operations and kernel launches. + * @param num_groups Number of groups + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory */ std::unique_ptr group_histogram(column_view const& values, @@ -472,22 +472,22 @@ std::unique_ptr group_merge_m2(column_view const& values, /** * @brief Internal API to merge multiple output of HISTOGRAM aggregation. * - * The input values column should be given as a structs column in the form of - * `STRUCT`. + * The input values column should be given as a lists column in the form of + * `LIST>`. * After merging, the order of distinct elements in each output list is not specified. * * @code{.pseudo} - * values = [<1, 2>, <2, 1>, <2, 2>, <3, 2>, <2, 1>, <1, 1>, <2, 1>] - * group_labels = [0, 0, 0, 1, 1, 1, 1] - * num_groups = 2 + * values = [ [<1, 2>, <2, 1>], [<2, 2>], [<3, 2>, <2, 1>], [<1, 1>, <2, 1>] ] + * group_offsets = [ 0, 2, 4] + * num_groups = 2 * - * output = [[<1, 2>, <2, 3>], [<1, 1>, <2, 2>, <3, 3>]]] + * output = [[<1, 2>, <2, 3>], [<1, 1>, <2, 2>, <3, 2>]]] * @endcode * * @param values Grouped values to get valid count of - * @param group_offsets Offsets of groups' starting points within @p values. - * @param num_groups Number of groups ( unique values in @p group_labels ) - * @param stream CUDA stream used for device memory operations and kernel launches. + * @param group_offsets Offsets of groups' starting points within @p values + * @param num_groups Number of groups + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory */ std::unique_ptr group_merge_histogram(column_view const& values, From d31de2006f8119d639dd84b1596deace16cae130 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 14:31:03 -0700 Subject: [PATCH 083/100] Rewrite docs --- cpp/src/reductions/histogram_helpers.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/reductions/histogram_helpers.hpp b/cpp/src/reductions/histogram_helpers.hpp index 3d3d548cc00..521b633cfa3 100644 --- a/cpp/src/reductions/histogram_helpers.hpp +++ b/cpp/src/reductions/histogram_helpers.hpp @@ -30,11 +30,11 @@ namespace cudf::reduction::detail { /** * @brief Compute the histogram for the input table. * - * This is equivalent to do a distinct count for each unique rows in the input. + * This is equivalent to do a distinct count for each unique row in the input. * * @param input The input table to compute histogram - * @param partial_counts An optional column containing counts for each row - * @param output_dtype The output type to store the count value + * @param partial_counts An optional column containing count for each row + * @param output_dtype The type to store count value * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate memory of the returned objects * @return A pair of array contains the (stable-order) indices of the distinct rows in the input From 34a426854e5b9e5a9f959e4eea67a7c8bf77af9a Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 14:32:39 -0700 Subject: [PATCH 084/100] Rewrite histogram.cu --- cpp/src/reductions/histogram.cu | 81 +++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 67aea83eee5..ea2f9afe620 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -24,7 +24,9 @@ #include #include -#include +#include +#include +#include #include @@ -37,19 +39,19 @@ namespace { /** * @brief The functor to accumulate the frequency of each distinct rows in the input table. */ -template -struct reduce_fn : cudf::detail::reduce_by_row_fn_base { - OutputType const* d_partial_output; +template +struct reduce_fn : cudf::detail::reduce_by_row_fn_base { + CountType const* d_partial_output; reduce_fn(MapView const& d_map, KeyHasher const& d_hasher, KeyEqual const& d_equal, - OutputType* const d_output, - OutputType const* const d_partial_output) - : cudf::detail::reduce_by_row_fn_base{d_map, - d_hasher, - d_equal, - d_output}, + CountType* const d_output, + CountType const* const d_partial_output) + : cudf::detail::reduce_by_row_fn_base{d_map, + d_hasher, + d_equal, + d_output}, d_partial_output{d_partial_output} { } @@ -57,9 +59,9 @@ struct reduce_fn : cudf::detail::reduce_by_row_fn_base(*this->get_output_ptr(idx)); + cuda::atomic_ref(*this->get_output_ptr(idx)); count.fetch_add(increment, cuda::std::memory_order_relaxed); } }; @@ -67,11 +69,11 @@ struct reduce_fn : cudf::detail::reduce_by_row_fn_base +template struct reduce_func_builder { - OutputType const* const d_partial_output; + CountType const* const d_partial_output; - reduce_func_builder(OutputType const* const d_partial_output) : d_partial_output{d_partial_output} + reduce_func_builder(CountType const* const d_partial_output) : d_partial_output{d_partial_output} { } @@ -79,17 +81,15 @@ struct reduce_func_builder { auto build(MapView const& d_map, KeyHasher const& d_hasher, KeyEqual const& d_equal, - OutputType* const d_output) + CountType* const d_output) { - return reduce_fn{ + return reduce_fn{ d_map, d_hasher, d_equal, d_output, d_partial_output}; } }; /** - * @brief Specialized functor to check for non-zero. - * - * The input must be given as Pair. Only value of T2 is checked for non-zero. + * @brief Specialized functor to check for non-zero of the second component of the input. */ struct is_none_zero { template @@ -100,27 +100,28 @@ struct is_none_zero { }; /** - * @brief Dispatcher functor to compute histogram in the given OutputType. + * @brief Dispatcher functor to compute histogram with frequencies (aka element counts) stored in + * a buffer of type given by CountType. * * The indices of distinct rows and their corresponding frequencies are written into two separate - * output buffer. + * output buffers. */ struct histogram_dispatcher { - template + template static bool constexpr is_supported() { // Currently only int64_t is requested by Spark-Rapids. // More data type (integer only) can be supported by enabling below. - return std::is_same_v; + return std::is_same_v; } - template - std::enable_if_t(), void> operator()(Args&&...) + template + std::enable_if_t(), void> operator()(Args&&...) { - CUDF_FAIL("Unsupported output type in histogram aggregation."); + CUDF_FAIL("Unsupported count type in histogram aggregation."); } - template ())> + template ())> void operator()( cudf::detail::hash_map_type const& map, std::shared_ptr const preprocessed_input, @@ -132,6 +133,7 @@ struct histogram_dispatcher { std::optional const& partial_counts, rmm::cuda_stream_view stream) const { + // Note that we consider null and NaNs as always equal. auto const reduction_results = cudf::detail::hash_reduce_by_row( map, preprocessed_input, @@ -140,24 +142,35 @@ struct histogram_dispatcher { has_nested_columns, null_equality::EQUAL, nan_equality::ALL_EQUAL, - reduce_func_builder{partial_counts ? partial_counts.value().begin() - : nullptr}, - OutputType{0}, + reduce_func_builder{partial_counts ? partial_counts.value().begin() + : nullptr}, + CountType{0}, stream, rmm::mr::get_current_device_resource()); auto const input_it = thrust::make_zip_iterator( thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); auto const output_it = thrust::make_zip_iterator( - thrust::make_tuple(output_indices, output_counts.begin())); + thrust::make_tuple(output_indices, output_counts.begin())); // Reduction results above are either group sizes of equal rows, or `0`. - // Thus, we need to extract the non-zero group sizes. + // The final output is non-zero group sizes only. thrust::copy_if( rmm::exec_policy(stream), input_it, input_it + num_rows, output_it, is_none_zero{}); } }; +/** + * @brief Building a histogram by gathering distinct rows from the input table and their + * corresponding distinct counts. + * + * @param input The input table + * @param distinct_indices Indices of the distinct rows + * @param distinct_counts Distinct counts corresponding to the distinct rows + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned object's device memory + * @return A list_scalar storing the output histogram + */ auto gather_histogram(table_view const& input, device_span distinct_indices, std::unique_ptr&& distinct_counts, @@ -194,7 +207,7 @@ std::pair, std::unique_ptr> table_histogr rmm::mr::device_memory_resource* mr) { CUDF_EXPECTS(cudf::is_integral(output_dtype), - "The output type of histogram aggregation must be an integral type."); + "The output count type of histogram aggregation must be an integral type."); auto map = cudf::detail::hash_map_type{ compute_hash_table_size(input.num_rows()), From 502a3daccad8cda3d7b37f972e765a4e9e008ec1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 14:33:24 -0700 Subject: [PATCH 085/100] Fix typo --- cpp/src/reductions/histogram.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index ea2f9afe620..b6078955d06 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -151,7 +151,7 @@ struct histogram_dispatcher { auto const input_it = thrust::make_zip_iterator( thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); auto const output_it = thrust::make_zip_iterator( - thrust::make_tuple(output_indices, output_counts.begin())); + thrust::make_tuple(output_indices, output_counts.begin())); // Reduction results above are either group sizes of equal rows, or `0`. // The final output is non-zero group sizes only. From 61377e0d586a5a4a93c20185e6e4de52e1686b5f Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 14:35:34 -0700 Subject: [PATCH 086/100] Fix header --- cpp/tests/groupby/histogram_tests.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp index 3345c483d3a..c5833f40cf2 100644 --- a/cpp/tests/groupby/histogram_tests.cpp +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -23,6 +21,7 @@ #include #include +#include #include #include From dd72159f570bd3eedca1da33080e96a7aa810f5c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 14:37:37 -0700 Subject: [PATCH 087/100] Revert changes --- cpp/tests/reductions/reduction_tests.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index ed85da9e50b..7644ac48892 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1091,10 +1091,10 @@ TEST_F(ReductionEmptyTest, empty_column) // test if null count is equal or greater than size of input // expect result.is_valid() is false int col_size = 5; - std::vector data_col(col_size); + std::vector col_data(col_size); std::vector valids(col_size, 0); - cudf::test::fixed_width_column_wrapper col_nulls = construct_null_column(data_col, valids); + cudf::test::fixed_width_column_wrapper col_nulls = construct_null_column(col_data, valids); CUDF_EXPECT_NO_THROW(statement(col_nulls)); auto any_agg = cudf::make_any_aggregation(); From 424196b72ccb6a1358791427b6adb80ccc7749d8 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 21:55:04 -0700 Subject: [PATCH 088/100] Add empty input handling --- cpp/src/groupby/groupby.cu | 19 ++++++++----------- cpp/src/reductions/histogram.cu | 18 ++++++++++++++++++ cpp/src/reductions/histogram_helpers.hpp | 11 +++++++++++ cpp/src/reductions/reductions.cpp | 11 +++++++++++ 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 3ec40266b16..d237c5db41b 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include #include #include @@ -111,18 +113,13 @@ struct empty_column_constructor { } if constexpr (k == aggregation::Kind::HISTOGRAM) { - std::vector> struct_children; - struct_children.emplace_back(empty_like(values)); - struct_children.emplace_back(make_numeric_column(data_type{type_id::INT64}, 0)); - auto structs = std::make_unique(data_type{type_id::STRUCT}, - 0, - rmm::device_buffer{}, - rmm::device_buffer{}, - 0, - std::move(struct_children)); - return make_lists_column( - 0, make_empty_column(type_to_id()), std::move(structs), 0, {}); + return make_lists_column(0, + make_empty_column(type_to_id()), + cudf::reduction::detail::make_empty_histogram_like(values), + 0, + {}); } + if constexpr (k == aggregation::Kind::MERGE_HISTOGRAM) { return empty_like(values); } if constexpr (k == aggregation::Kind::RANK) { auto const& rank_agg = dynamic_cast(agg); diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index b6078955d06..651be4e3e98 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -199,6 +199,19 @@ auto gather_histogram(table_view const& input, } // namespace +std::unique_ptr make_empty_histogram_like(column_view const& values) +{ + std::vector> struct_children; + struct_children.emplace_back(empty_like(values)); + struct_children.emplace_back(make_numeric_column(data_type{type_id::INT64}, 0)); + return std::make_unique(data_type{type_id::STRUCT}, + 0, + rmm::device_buffer{}, + rmm::device_buffer{}, + 0, + std::move(struct_children)); +} + std::pair, std::unique_ptr> table_histogram( table_view const& input, std::optional const& partial_counts, @@ -269,6 +282,9 @@ std::unique_ptr histogram(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // Empty group should be handled before reaching here. + CUDF_EXPECTS(input.size() > 0, "Input should not be empty."); + auto const input_tv = table_view{{input}}; auto [distinct_indices, distinct_counts] = table_histogram(input_tv, std::nullopt, output_dtype, stream, mr); @@ -279,6 +295,8 @@ std::unique_ptr merge_histogram(column_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + // Empty group should be handled before reaching here. + CUDF_EXPECTS(input.size() > 0, "Input should not be empty."); CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls."); CUDF_EXPECTS(input.type().id() == type_id::STRUCT && input.num_children() == 2, "The input must be a structs column having two children."); diff --git a/cpp/src/reductions/histogram_helpers.hpp b/cpp/src/reductions/histogram_helpers.hpp index 521b633cfa3..62051b9240e 100644 --- a/cpp/src/reductions/histogram_helpers.hpp +++ b/cpp/src/reductions/histogram_helpers.hpp @@ -23,6 +23,7 @@ #include #include +#include #include namespace cudf::reduction::detail { @@ -47,4 +48,14 @@ std::pair, std::unique_ptr> table_histogr rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Create an empty histogram column. + * + * A histogram column is a structs column `STRUCT` where T is type of the input + * values. + * + * @returns An empty histogram column + */ +std::unique_ptr make_empty_histogram_like(column_view const& values); + } // namespace cudf::reduction::detail diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 8d19413190b..9e476742baa 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include + #include #include #include @@ -167,6 +169,15 @@ std::unique_ptr reduce(column_view const& col, return tdigest::detail::make_empty_tdigest_scalar(stream, mr); } + if (agg.kind == aggregation::HISTOGRAM) { + return std::make_unique( + std::move(*reduction::detail::make_empty_histogram_like(col)), true, stream, mr); + } + if (agg.kind == aggregation::MERGE_HISTOGRAM) { + return std::make_unique( + std::move(*reduction::detail::make_empty_histogram_like(col.child(0))), true, stream, mr); + } + if (output_dtype.id() == type_id::LIST) { if (col.type() == output_dtype) { return make_empty_scalar_like(col, stream, mr); } // Under some circumstance, the output type will become the List of input type, From 26238dd8a39846ad2a578d700b41613c2ae1167b Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 20 Sep 2023 22:27:43 -0700 Subject: [PATCH 089/100] Rename function and change return type --- cpp/src/groupby/sort/group_histogram.cu | 6 +++--- cpp/src/reductions/histogram.cu | 16 ++++++++-------- cpp/src/reductions/histogram_helpers.hpp | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 927cd43d040..a3f0c5b1e47 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -56,12 +56,12 @@ std::unique_ptr build_histogram(column_view const& values, auto const labeled_values = table_view{{labels_cv, values}}; // Build histogram for the labeled values. - auto [distinct_indices, distinct_counts] = cudf::reduction::detail::table_histogram( + auto [distinct_indices, distinct_counts] = cudf::reduction::detail::histogram_table( labeled_values, partial_counts, histogram_count_dtype, stream, mr); // Gather the distinct rows for the output histogram. auto out_table = cudf::detail::gather(labeled_values, - distinct_indices, + *distinct_indices, out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, stream, @@ -76,7 +76,7 @@ std::unique_ptr build_histogram(column_view const& values, struct_children.emplace_back(std::move(out_table->release().back())); struct_children.emplace_back(std::move(distinct_counts)); auto out_structs = make_structs_column( - static_cast(distinct_indices.size()), std::move(struct_children), 0, {}, stream, mr); + static_cast(distinct_indices->size()), std::move(struct_children), 0, {}, stream, mr); return make_lists_column( num_groups, std::move(out_offsets), std::move(out_structs), 0, {}, stream, mr); diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 651be4e3e98..f4ae7fdb407 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -212,7 +212,7 @@ std::unique_ptr make_empty_histogram_like(column_view const& values) std::move(struct_children)); } -std::pair, std::unique_ptr> table_histogram( +std::pair>, std::unique_ptr> histogram_table( table_view const& input, std::optional const& partial_counts, data_type const output_dtype, @@ -255,8 +255,8 @@ std::pair, std::unique_ptr> table_histogr } // Gather the indices of distinct rows. - auto distinct_indices = rmm::device_uvector( - static_cast(map.get_size()), stream, rmm::mr::get_current_device_resource()); + auto distinct_indices = std::make_unique>( + static_cast(map.get_size()), stream, mr); // Store the number of occurrences of each distinct row. auto distinct_counts = make_numeric_column( @@ -269,7 +269,7 @@ std::pair, std::unique_ptr> table_histogr input.num_rows(), has_nulls, has_nested_columns, - distinct_indices.begin(), + distinct_indices->begin(), distinct_counts->mutable_view(), partial_counts, stream); @@ -287,8 +287,8 @@ std::unique_ptr histogram(column_view const& input, auto const input_tv = table_view{{input}}; auto [distinct_indices, distinct_counts] = - table_histogram(input_tv, std::nullopt, output_dtype, stream, mr); - return gather_histogram(input_tv, distinct_indices, std::move(distinct_counts), stream, mr); + histogram_table(input_tv, std::nullopt, output_dtype, stream, mr); + return gather_histogram(input_tv, *distinct_indices, std::move(distinct_counts), stream, mr); } std::unique_ptr merge_histogram(column_view const& input, @@ -309,8 +309,8 @@ std::unique_ptr merge_histogram(column_view const& input, auto const values_tv = table_view{{input_values}}; auto [distinct_indices, distinct_counts] = - table_histogram(values_tv, input_counts, data_type{type_id::INT64}, stream, mr); - return gather_histogram(values_tv, distinct_indices, std::move(distinct_counts), stream, mr); + histogram_table(values_tv, input_counts, data_type{type_id::INT64}, stream, mr); + return gather_histogram(values_tv, *distinct_indices, std::move(distinct_counts), stream, mr); } } // namespace cudf::reduction::detail diff --git a/cpp/src/reductions/histogram_helpers.hpp b/cpp/src/reductions/histogram_helpers.hpp index 62051b9240e..0f830e71bbc 100644 --- a/cpp/src/reductions/histogram_helpers.hpp +++ b/cpp/src/reductions/histogram_helpers.hpp @@ -41,7 +41,7 @@ namespace cudf::reduction::detail { * @return A pair of array contains the (stable-order) indices of the distinct rows in the input * table, and their corresponding distinct counts */ -std::pair, std::unique_ptr> table_histogram( +std::pair>, std::unique_ptr> histogram_table( table_view const& input, std::optional const& partial_counts, data_type const output_dtype, From 76f77a00a7cd0b28f04f7958dfac00fc577e7f1d Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Sep 2023 09:40:23 -0700 Subject: [PATCH 090/100] Format --- cpp/src/groupby/sort/group_histogram.cu | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index a3f0c5b1e47..5c02c57af10 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -75,8 +75,12 @@ std::unique_ptr build_histogram(column_view const& values, std::vector> struct_children; struct_children.emplace_back(std::move(out_table->release().back())); struct_children.emplace_back(std::move(distinct_counts)); - auto out_structs = make_structs_column( - static_cast(distinct_indices->size()), std::move(struct_children), 0, {}, stream, mr); + auto out_structs = make_structs_column(static_cast(distinct_indices->size()), + std::move(struct_children), + 0, + {}, + stream, + mr); return make_lists_column( num_groups, std::move(out_offsets), std::move(out_structs), 0, {}, stream, mr); From ad09d30c41c684e526a1e933ccb58fc8b4b89467 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Sep 2023 10:12:42 -0700 Subject: [PATCH 091/100] Revert "Add binding for `HISTOGRAM` and `MERGE_HISTOGRAM` aggregations" This reverts commit ee229a00125c9a4c2edff32c73ca8d952c75b1e7. --- .../main/java/ai/rapids/cudf/Aggregation.java | 24 +------------------ .../ai/rapids/cudf/GroupByAggregation.java | 8 ------- java/src/main/native/src/AggregationJni.cpp | 5 ---- 3 files changed, 1 insertion(+), 36 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index 029017ae113..d10329ca0f2 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -68,9 +68,7 @@ enum Kind { DENSE_RANK(29), PERCENT_RANK(30), TDIGEST(31), // This can take a delta argument for accuracy level - MERGE_TDIGEST(32), // This can take a delta argument for accuracy level - HISTOGRAM(33), - MERGE_HISTOGRAM(34); + MERGE_TDIGEST(32); // This can take a delta argument for accuracy level final int nativeId; @@ -920,26 +918,6 @@ static TDigestAggregation mergeTDigest(int delta) { return new TDigestAggregation(Kind.MERGE_TDIGEST, delta); } - static final class HistogramAggregation extends NoParamAggregation { - private HistogramAggregation() { - super(Kind.HISTOGRAM); - } - } - - static final class MergeHistogramAggregation extends NoParamAggregation { - private MergeHistogramAggregation() { - super(Kind.MERGE_HISTOGRAM); - } - } - - static HistogramAggregation histogram() { - return new HistogramAggregation(); - } - - static MergeHistogramAggregation mergeHistogram() { - return new MergeHistogramAggregation(); - } - /** * Create one of the aggregations that only needs a kind, no other parameters. This does not * work for all types and for code safety reasons each kind is added separately. diff --git a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java index 25bb716bd5a..500d18f7eae 100644 --- a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java @@ -315,12 +315,4 @@ public static GroupByAggregation createTDigest(int delta) { public static GroupByAggregation mergeTDigest(int delta) { return new GroupByAggregation(Aggregation.mergeTDigest(delta)); } - - public static GroupByAggregation histogram() { - return new GroupByAggregation(Aggregation.histogram()); - } - - public static GroupByAggregation mergeHistogram() { - return new GroupByAggregation(Aggregation.mergeHistogram()); - } } diff --git a/java/src/main/native/src/AggregationJni.cpp b/java/src/main/native/src/AggregationJni.cpp index 8984c27530d..6ac73282615 100644 --- a/java/src/main/native/src/AggregationJni.cpp +++ b/java/src/main/native/src/AggregationJni.cpp @@ -90,11 +90,6 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Aggregation_createNoParamAgg(JNIEnv case 30: // ANSI SQL PERCENT_RANK return cudf::make_rank_aggregation(cudf::rank_method::MIN, {}, cudf::null_policy::INCLUDE, {}, cudf::rank_percentage::ONE_NORMALIZED); - case 33: // HISTOGRAM - return cudf::make_histogram_aggregation(); - case 34: // MERGE_HISTOGRAM - return cudf::make_merge_histogram_aggregation(); - default: throw std::logic_error("Unsupported No Parameter Aggregation Operation"); } }(); From e12df0f48749c89521aa892b956851ea14c5a6db Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Sep 2023 16:26:20 -0700 Subject: [PATCH 092/100] Rename function, remove histogram output dtype --- .../reduction/detail/reduction_functions.hpp | 2 - cpp/src/groupby/sort/group_histogram.cu | 7 +- cpp/src/reductions/histogram.cu | 128 ++++++------------ cpp/src/reductions/histogram_helpers.hpp | 16 +-- cpp/src/reductions/reductions.cpp | 2 +- 5 files changed, 50 insertions(+), 105 deletions(-) diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp index 9c85e754c2f..f013f91c5c3 100644 --- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -140,13 +140,11 @@ std::unique_ptr all(column_view const& col, * @throw cudf::logic_error if `output_dtype` is not integer type * * @param input The column to compute histogram - * @param output_dtype Data type to store the element frequencies * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned scalar's device memory * @return A list_scalar storing a structs column as the result histogram */ std::unique_ptr histogram(column_view const& input, - data_type const output_dtype, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 5c02c57af10..6e4149b965e 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -31,9 +31,6 @@ namespace cudf::groupby::detail { -// Fixed type for counting frequencies in historam. -constexpr auto histogram_count_dtype = data_type{type_to_id()}; - namespace { std::unique_ptr build_histogram(column_view const& values, @@ -56,8 +53,8 @@ std::unique_ptr build_histogram(column_view const& values, auto const labeled_values = table_view{{labels_cv, values}}; // Build histogram for the labeled values. - auto [distinct_indices, distinct_counts] = cudf::reduction::detail::histogram_table( - labeled_values, partial_counts, histogram_count_dtype, stream, mr); + auto [distinct_indices, distinct_counts] = + cudf::reduction::detail::compute_row_frequencies(labeled_values, partial_counts, stream, mr); // Gather the distinct rows for the output histogram. auto out_table = cudf::detail::gather(labeled_values, diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index f4ae7fdb407..7d8b432a5e6 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -36,6 +36,9 @@ namespace cudf::reduction::detail { namespace { +// Always use 64-bit signed integer for storing count. +using histogram_count_type = int64_t; + /** * @brief The functor to accumulate the frequency of each distinct rows in the input table. */ @@ -99,67 +102,6 @@ struct is_none_zero { } }; -/** - * @brief Dispatcher functor to compute histogram with frequencies (aka element counts) stored in - * a buffer of type given by CountType. - * - * The indices of distinct rows and their corresponding frequencies are written into two separate - * output buffers. - */ -struct histogram_dispatcher { - template - static bool constexpr is_supported() - { - // Currently only int64_t is requested by Spark-Rapids. - // More data type (integer only) can be supported by enabling below. - return std::is_same_v; - } - - template - std::enable_if_t(), void> operator()(Args&&...) - { - CUDF_FAIL("Unsupported count type in histogram aggregation."); - } - - template ())> - void operator()( - cudf::detail::hash_map_type const& map, - std::shared_ptr const preprocessed_input, - size_type num_rows, - cudf::nullate::DYNAMIC has_nulls, - bool has_nested_columns, - size_type* output_indices, - mutable_column_view const& output_counts, - std::optional const& partial_counts, - rmm::cuda_stream_view stream) const - { - // Note that we consider null and NaNs as always equal. - auto const reduction_results = cudf::detail::hash_reduce_by_row( - map, - preprocessed_input, - num_rows, - has_nulls, - has_nested_columns, - null_equality::EQUAL, - nan_equality::ALL_EQUAL, - reduce_func_builder{partial_counts ? partial_counts.value().begin() - : nullptr}, - CountType{0}, - stream, - rmm::mr::get_current_device_resource()); - - auto const input_it = thrust::make_zip_iterator( - thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); - auto const output_it = thrust::make_zip_iterator( - thrust::make_tuple(output_indices, output_counts.begin())); - - // Reduction results above are either group sizes of equal rows, or `0`. - // The final output is non-zero group sizes only. - thrust::copy_if( - rmm::exec_policy(stream), input_it, input_it + num_rows, output_it, is_none_zero{}); - } -}; - /** * @brief Building a histogram by gathering distinct rows from the input table and their * corresponding distinct counts. @@ -212,16 +154,12 @@ std::unique_ptr make_empty_histogram_like(column_view const& values) std::move(struct_children)); } -std::pair>, std::unique_ptr> histogram_table( - table_view const& input, - std::optional const& partial_counts, - data_type const output_dtype, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) +std::pair>, std::unique_ptr> +compute_row_frequencies(table_view const& input, + std::optional const& partial_counts, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(cudf::is_integral(output_dtype), - "The output count type of histogram aggregation must be an integral type."); - auto map = cudf::detail::hash_map_type{ compute_hash_table_size(input.num_rows()), cuco::empty_key{-1}, @@ -259,26 +197,42 @@ std::pair>, std::unique_ptr(map.get_size()), stream, mr); // Store the number of occurrences of each distinct row. - auto distinct_counts = make_numeric_column( - output_dtype, static_cast(map.get_size()), mask_state::UNALLOCATED, stream, mr); - - type_dispatcher(output_dtype, - histogram_dispatcher{}, - map, - std::move(preprocessed_input), - input.num_rows(), - has_nulls, - has_nested_columns, - distinct_indices->begin(), - distinct_counts->mutable_view(), - partial_counts, - stream); + auto distinct_counts = make_numeric_column(data_type{type_to_id()}, + static_cast(map.get_size()), + mask_state::UNALLOCATED, + stream, + mr); + + // Compute frequencies (aka unique counts) for the input rows. + // Note that we consider null and NaNs as always equal. + auto const reduction_results = cudf::detail::hash_reduce_by_row( + map, + preprocessed_input, + input.num_rows(), + has_nulls, + has_nested_columns, + null_equality::EQUAL, + nan_equality::ALL_EQUAL, + reduce_func_builder{ + partial_counts ? partial_counts.value().begin() : nullptr}, + histogram_count_type{0}, + stream, + rmm::mr::get_current_device_resource()); + + auto const input_it = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); + auto const output_it = thrust::make_zip_iterator(thrust::make_tuple( + distinct_indices->begin(), distinct_counts->mutable_view().begin())); + + // Reduction results above are either group sizes of equal rows, or `0`. + // The final output is non-zero group sizes only. + thrust::copy_if( + rmm::exec_policy(stream), input_it, input_it + input.num_rows(), output_it, is_none_zero{}); return {std::move(distinct_indices), std::move(distinct_counts)}; } std::unique_ptr histogram(column_view const& input, - data_type const output_dtype, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -287,7 +241,7 @@ std::unique_ptr histogram(column_view const& input, auto const input_tv = table_view{{input}}; auto [distinct_indices, distinct_counts] = - histogram_table(input_tv, std::nullopt, output_dtype, stream, mr); + compute_row_frequencies(input_tv, std::nullopt, stream, mr); return gather_histogram(input_tv, *distinct_indices, std::move(distinct_counts), stream, mr); } @@ -309,7 +263,7 @@ std::unique_ptr merge_histogram(column_view const& input, auto const values_tv = table_view{{input_values}}; auto [distinct_indices, distinct_counts] = - histogram_table(values_tv, input_counts, data_type{type_id::INT64}, stream, mr); + compute_row_frequencies(values_tv, input_counts, stream, mr); return gather_histogram(values_tv, *distinct_indices, std::move(distinct_counts), stream, mr); } diff --git a/cpp/src/reductions/histogram_helpers.hpp b/cpp/src/reductions/histogram_helpers.hpp index 0f830e71bbc..fc5965e92f9 100644 --- a/cpp/src/reductions/histogram_helpers.hpp +++ b/cpp/src/reductions/histogram_helpers.hpp @@ -29,24 +29,20 @@ namespace cudf::reduction::detail { /** - * @brief Compute the histogram for the input table. - * - * This is equivalent to do a distinct count for each unique row in the input. + * @brief Compute the frequency for each unique row in the input table. * * @param input The input table to compute histogram * @param partial_counts An optional column containing count for each row - * @param output_dtype The type to store count value * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate memory of the returned objects * @return A pair of array contains the (stable-order) indices of the distinct rows in the input * table, and their corresponding distinct counts */ -std::pair>, std::unique_ptr> histogram_table( - table_view const& input, - std::optional const& partial_counts, - data_type const output_dtype, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr); +std::pair>, std::unique_ptr> +compute_row_frequencies(table_view const& input, + std::optional const& partial_counts, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr); /** * @brief Create an empty histogram column. diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 9e476742baa..42bb83391bd 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -61,7 +61,7 @@ struct reduce_dispatch_functor { case aggregation::MAX: return max(col, output_dtype, init, stream, mr); case aggregation::ANY: return any(col, output_dtype, init, stream, mr); case aggregation::ALL: return all(col, output_dtype, init, stream, mr); - case aggregation::HISTOGRAM: return histogram(col, output_dtype, stream, mr); + case aggregation::HISTOGRAM: return histogram(col, stream, mr); case aggregation::MERGE_HISTOGRAM: return merge_histogram(col, stream, mr); case aggregation::SUM_OF_SQUARES: return sum_of_squares(col, output_dtype, stream, mr); case aggregation::MEAN: return mean(col, output_dtype, stream, mr); From b06ed2a60f391bfcc77de68d3e2c18ac53864951 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Sep 2023 16:30:50 -0700 Subject: [PATCH 093/100] Move header --- .../reductions => include/cudf/detail}/histogram_helpers.hpp | 0 cpp/src/groupby/groupby.cu | 3 +-- cpp/src/groupby/sort/group_histogram.cu | 2 +- cpp/src/reductions/reductions.cpp | 3 +-- 4 files changed, 3 insertions(+), 5 deletions(-) rename cpp/{src/reductions => include/cudf/detail}/histogram_helpers.hpp (100%) diff --git a/cpp/src/reductions/histogram_helpers.hpp b/cpp/include/cudf/detail/histogram_helpers.hpp similarity index 100% rename from cpp/src/reductions/histogram_helpers.hpp rename to cpp/include/cudf/detail/histogram_helpers.hpp diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index d237c5db41b..e1406881569 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include @@ -26,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 6e4149b965e..28b4a3154ca 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -15,11 +15,11 @@ */ #include -#include #include #include #include +#include #include #include #include diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 42bb83391bd..31f9948a02f 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -14,11 +14,10 @@ * limitations under the License. */ -#include - #include #include #include +#include #include #include #include From b6b720aaffa42c2b3380befa173d73bd384b4cca Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 21 Sep 2023 16:42:25 -0700 Subject: [PATCH 094/100] Fix docs --- cpp/include/cudf/reduction/detail/reduction_functions.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp index f013f91c5c3..704332c8e1d 100644 --- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -137,8 +137,6 @@ std::unique_ptr all(column_view const& col, * The result histogram is stored in structs column having two children. The first child contains * unique elements from the input, and the second child contains their corresponding frequencies. * - * @throw cudf::logic_error if `output_dtype` is not integer type - * * @param input The column to compute histogram * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned scalar's device memory From b30f70c28c2d6f000da6d434c57d3f41f610ad07 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Sep 2023 14:01:57 -0700 Subject: [PATCH 095/100] Move detail header file, use `std::invalid_argument`, and some comments --- .../histogram.hpp} | 6 ++-- cpp/src/groupby/groupby.cu | 2 +- cpp/src/groupby/sort/group_histogram.cu | 25 +++++++------ cpp/src/reductions/histogram.cu | 36 +++++++++---------- cpp/src/reductions/reductions.cpp | 2 +- 5 files changed, 37 insertions(+), 34 deletions(-) rename cpp/include/cudf/detail/{histogram_helpers.hpp => reduction/histogram.hpp} (87%) diff --git a/cpp/include/cudf/detail/histogram_helpers.hpp b/cpp/include/cudf/detail/reduction/histogram.hpp similarity index 87% rename from cpp/include/cudf/detail/histogram_helpers.hpp rename to cpp/include/cudf/detail/reduction/histogram.hpp index fc5965e92f9..97c711fda4e 100644 --- a/cpp/include/cudf/detail/histogram_helpers.hpp +++ b/cpp/include/cudf/detail/reduction/histogram.hpp @@ -29,7 +29,7 @@ namespace cudf::reduction::detail { /** - * @brief Compute the frequency for each unique row in the input table. + * @brief Compute the frequency for each distinct row in the input table. * * @param input The input table to compute histogram * @param partial_counts An optional column containing count for each row @@ -38,7 +38,7 @@ namespace cudf::reduction::detail { * @return A pair of array contains the (stable-order) indices of the distinct rows in the input * table, and their corresponding distinct counts */ -std::pair>, std::unique_ptr> +[[nodiscard]] std::pair>, std::unique_ptr> compute_row_frequencies(table_view const& input, std::optional const& partial_counts, rmm::cuda_stream_view stream, @@ -52,6 +52,6 @@ compute_row_frequencies(table_view const& input, * * @returns An empty histogram column */ -std::unique_ptr make_empty_histogram_like(column_view const& values); +[[nodiscard]] std::unique_ptr make_empty_histogram_like(column_view const& values); } // namespace cudf::reduction::detail diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index e1406881569..b52f99f8a3d 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 28b4a3154ca..c50edca4e85 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -19,8 +19,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -40,9 +40,9 @@ std::unique_ptr build_histogram(column_view const& values, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - CUDF_EXPECTS(num_groups >= 0, "Number of groups cannot be negative."); CUDF_EXPECTS(static_cast(values.size()) == group_labels.size(), - "Size of values column should be the same as that of group labels."); + "Size of values column should be the same as that of group labels.", + std::invalid_argument); // Attach group labels to the input values. auto const labels_cv = column_view{data_type{type_to_id()}, @@ -92,7 +92,7 @@ std::unique_ptr group_histogram(column_view const& values, rmm::mr::device_memory_resource* mr) { // Empty group should be handled before reaching here. - CUDF_EXPECTS(num_groups > 0, "Group should not be empty."); + CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument); return build_histogram(values, group_labels, std::nullopt, num_groups, stream, mr); } @@ -104,23 +104,28 @@ std::unique_ptr group_merge_histogram(column_view const& values, rmm::mr::device_memory_resource* mr) { // Empty group should be handled before reaching here. - CUDF_EXPECTS(num_groups > 0, "Group should not be empty."); + CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument); // The input must be a lists column without nulls. - CUDF_EXPECTS(!values.has_nulls(), "The input column must not have nulls."); + CUDF_EXPECTS(!values.has_nulls(), "The input column must not have nulls.", std::invalid_argument); CUDF_EXPECTS(values.type().id() == type_id::LIST, - "The input of MERGE_HISTOGRAM aggregation must be a lists column."); + "The input of MERGE_HISTOGRAM aggregation must be a lists column.", + std::invalid_argument); // Child of the input lists column must be a structs column without nulls, // and its second child is a columns of integer type having no nulls. auto const lists_cv = lists_column_view{values}; auto const histogram_cv = lists_cv.get_sliced_child(stream); - CUDF_EXPECTS(!histogram_cv.has_nulls(), "Child of the input lists column must not have nulls."); + CUDF_EXPECTS(!histogram_cv.has_nulls(), + "Child of the input lists column must not have nulls.", + std::invalid_argument); CUDF_EXPECTS(histogram_cv.type().id() == type_id::STRUCT && histogram_cv.num_children() == 2, - "The input column has invalid histograms structure."); + "The input column has invalid histograms structure.", + std::invalid_argument); CUDF_EXPECTS( cudf::is_integral(histogram_cv.child(1).type()) && !histogram_cv.child(1).has_nulls(), - "The input column has invalid histograms structure."); + "The input column has invalid histograms structure.", + std::invalid_argument); // Concatenate the histograms corresponding to the same key values. // That is equivalent to creating a new lists column (view) from the input lists column diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 7d8b432a5e6..7daf995cee3 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -92,9 +91,9 @@ struct reduce_func_builder { }; /** - * @brief Specialized functor to check for non-zero of the second component of the input. + * @brief Specialized functor to check for not-zero of the second component of the input. */ -struct is_none_zero { +struct is_not_zero { template __device__ bool operator()(Pair const input) const { @@ -119,18 +118,15 @@ auto gather_histogram(table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - auto distinct_rows = - std::move(cudf::detail::gather(input, - distinct_indices, - out_of_bounds_policy::DONT_CHECK, - cudf::detail::negative_index_policy::NOT_ALLOWED, - stream, - mr) - ->release() - .front()); + auto distinct_rows = cudf::detail::gather(input, + distinct_indices, + out_of_bounds_policy::DONT_CHECK, + cudf::detail::negative_index_policy::NOT_ALLOWED, + stream, + mr); std::vector> struct_children; - struct_children.emplace_back(std::move(distinct_rows)); + struct_children.emplace_back(std::move(distinct_rows->release().front())); struct_children.emplace_back(std::move(distinct_counts)); auto output_structs = make_structs_column( static_cast(distinct_indices.size()), std::move(struct_children), 0, {}, stream, mr); @@ -227,7 +223,7 @@ compute_row_frequencies(table_view const& input, // Reduction results above are either group sizes of equal rows, or `0`. // The final output is non-zero group sizes only. thrust::copy_if( - rmm::exec_policy(stream), input_it, input_it + input.num_rows(), output_it, is_none_zero{}); + rmm::exec_policy(stream), input_it, input_it + input.num_rows(), output_it, is_not_zero{}); return {std::move(distinct_indices), std::move(distinct_counts)}; } @@ -237,7 +233,7 @@ std::unique_ptr histogram(column_view const& input, rmm::mr::device_memory_resource* mr) { // Empty group should be handled before reaching here. - CUDF_EXPECTS(input.size() > 0, "Input should not be empty."); + CUDF_EXPECTS(input.size() > 0, "Input should not be empty.", std::invalid_argument); auto const input_tv = table_view{{input}}; auto [distinct_indices, distinct_counts] = @@ -250,12 +246,14 @@ std::unique_ptr merge_histogram(column_view const& input, rmm::mr::device_memory_resource* mr) { // Empty group should be handled before reaching here. - CUDF_EXPECTS(input.size() > 0, "Input should not be empty."); - CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls."); + CUDF_EXPECTS(input.size() > 0, "Input should not be empty.", std::invalid_argument); + CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls.", std::invalid_argument); CUDF_EXPECTS(input.type().id() == type_id::STRUCT && input.num_children() == 2, - "The input must be a structs column having two children."); + "The input must be a structs column having two children.", + std::invalid_argument); CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(), - "The second child of the input column must be integral type and has no nulls."); + "The second child of the input column must be of integral type and without nulls.", + std::invalid_argument); auto const structs_cv = structs_column_view{input}; auto const input_values = structs_cv.get_sliced_child(0, stream); diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 31f9948a02f..bf01161a456 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -17,9 +17,9 @@ #include #include #include -#include #include #include +#include #include #include #include From 89f36287983ee5dbf6ad884f7d64aa5bb1c2cf33 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Sep 2023 14:31:06 -0700 Subject: [PATCH 096/100] Move header file and fix comment --- .../cudf/{detail/reduction => reduction/detail}/histogram.hpp | 0 cpp/src/groupby/groupby.cu | 2 +- cpp/src/groupby/sort/group_histogram.cu | 2 +- cpp/src/reductions/histogram.cu | 2 +- cpp/src/reductions/reductions.cpp | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename cpp/include/cudf/{detail/reduction => reduction/detail}/histogram.hpp (100%) diff --git a/cpp/include/cudf/detail/reduction/histogram.hpp b/cpp/include/cudf/reduction/detail/histogram.hpp similarity index 100% rename from cpp/include/cudf/detail/reduction/histogram.hpp rename to cpp/include/cudf/reduction/detail/histogram.hpp diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index b52f99f8a3d..e3c021eb66a 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -25,10 +25,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index c50edca4e85..bb70037aaef 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 7daf995cee3..d5056a82959 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -199,7 +199,7 @@ compute_row_frequencies(table_view const& input, stream, mr); - // Compute frequencies (aka unique counts) for the input rows. + // Compute frequencies (aka distinct counts) for the input rows. // Note that we consider null and NaNs as always equal. auto const reduction_results = cudf::detail::hash_reduce_by_row( map, diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index bf01161a456..23171baaa45 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -19,11 +19,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include From c3ad10461b4f2e9aabb12b5e69a950a7c1314cd7 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Sep 2023 14:43:02 -0700 Subject: [PATCH 097/100] Append enum --- cpp/include/cudf/aggregation.hpp | 72 ++++++++++++++++---------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index 75d693df9e0..d458c831f19 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -83,42 +83,42 @@ class aggregation { * @brief Possible aggregation operations */ enum Kind { - SUM, ///< sum reduction - PRODUCT, ///< product reduction - MIN, ///< min reduction - MAX, ///< max reduction - COUNT_VALID, ///< count number of valid elements - COUNT_ALL, ///< count number of elements - HISTOGRAM, ///< compute frequency of each element - ANY, ///< any reduction - ALL, ///< all reduction - SUM_OF_SQUARES, ///< sum of squares reduction - MEAN, ///< arithmetic mean reduction - M2, ///< sum of squares of differences from the mean - VARIANCE, ///< variance - STD, ///< standard deviation - MEDIAN, ///< median reduction - QUANTILE, ///< compute specified quantile(s) - ARGMAX, ///< Index of max element - ARGMIN, ///< Index of min element - NUNIQUE, ///< count number of unique elements - NTH_ELEMENT, ///< get the nth element - ROW_NUMBER, ///< get row-number of current index (relative to rolling window) - RANK, ///< get rank of current index - COLLECT_LIST, ///< collect values into a list - COLLECT_SET, ///< collect values into a list without duplicate entries - LEAD, ///< window function, accesses row at specified offset following current row - LAG, ///< window function, accesses row at specified offset preceding current row - PTX, ///< PTX UDF based reduction - CUDA, ///< CUDA UDF based reduction - MERGE_LISTS, ///< merge multiple lists values into one list - MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries - MERGE_M2, ///< merge partial values of M2 aggregation, - MERGE_HISTOGRAM, ///< merge partial values of HISTOGRAM aggregation, - COVARIANCE, ///< covariance between two sets of elements - CORRELATION, ///< correlation between two sets of elements - TDIGEST, ///< create a tdigest from a set of input values - MERGE_TDIGEST ///< create a tdigest by merging multiple tdigests together + SUM, ///< sum reduction + PRODUCT, ///< product reduction + MIN, ///< min reduction + MAX, ///< max reduction + COUNT_VALID, ///< count number of valid elements + COUNT_ALL, ///< count number of elements + ANY, ///< any reduction + ALL, ///< all reduction + SUM_OF_SQUARES, ///< sum of squares reduction + MEAN, ///< arithmetic mean reduction + M2, ///< sum of squares of differences from the mean + VARIANCE, ///< variance + STD, ///< standard deviation + MEDIAN, ///< median reduction + QUANTILE, ///< compute specified quantile(s) + ARGMAX, ///< Index of max element + ARGMIN, ///< Index of min element + NUNIQUE, ///< count number of unique elements + NTH_ELEMENT, ///< get the nth element + ROW_NUMBER, ///< get row-number of current index (relative to rolling window) + RANK, ///< get rank of current index + COLLECT_LIST, ///< collect values into a list + COLLECT_SET, ///< collect values into a list without duplicate entries + LEAD, ///< window function, accesses row at specified offset following current row + LAG, ///< window function, accesses row at specified offset preceding current row + PTX, ///< PTX UDF based reduction + CUDA, ///< CUDA UDF based reduction + MERGE_LISTS, ///< merge multiple lists values into one list + MERGE_SETS, ///< merge multiple lists values into one list then drop duplicate entries + MERGE_M2, ///< merge partial values of M2 aggregation, + COVARIANCE, ///< covariance between two sets of elements + CORRELATION, ///< correlation between two sets of elements + TDIGEST, ///< create a tdigest from a set of input values + MERGE_TDIGEST, ///< create a tdigest by merging multiple tdigests together + HISTOGRAM, ///< compute frequency of each element + MERGE_HISTOGRAM ///< merge partial values of HISTOGRAM aggregation, }; aggregation() = delete; From f504c86bf69548a22595964f28e7015ed4a317d9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Sep 2023 15:14:39 -0700 Subject: [PATCH 098/100] Redeclare `hash_table_allocator_type` --- cpp/include/cudf/detail/hash_reduce_by_row.cuh | 4 ++++ cpp/src/reductions/histogram.cu | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh index 2d2b43f1d4a..e07b311a7a8 100644 --- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh +++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh @@ -16,10 +16,13 @@ #include #include +#include +#include #include #include #include +#include #include #include @@ -29,6 +32,7 @@ namespace cudf::detail { +using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor>; using hash_map_type = cuco::static_map; diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index d5056a82959..459d9d5e436 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -14,8 +14,6 @@ * limitations under the License. */ -#include - #include #include #include From e9d723e64b73fa3a77167472cbdc1c6e0ba17351 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Sep 2023 16:12:02 -0700 Subject: [PATCH 099/100] Temporarily fail on nested input --- cpp/src/reductions/histogram.cu | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index 459d9d5e436..fa84bbeb25d 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -154,6 +154,14 @@ compute_row_frequencies(table_view const& input, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { + auto const has_nested_columns = cudf::detail::has_nested_columns(input); + + // Nested types are not tested, thus we just throw exception if we see such input for now. + // We should remove this check after having enough tests. + CUDF_EXPECTS(!has_nested_columns, + "Nested types are not yet supported in histogram aggregation.", + std::invalid_argument); + auto map = cudf::detail::hash_map_type{ compute_hash_table_size(input.num_rows()), cuco::empty_key{-1}, @@ -163,8 +171,7 @@ compute_row_frequencies(table_view const& input, auto const preprocessed_input = cudf::experimental::row::hash::preprocessed_table::create(input, stream); - auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; - auto const has_nested_columns = cudf::detail::has_nested_columns(input); + auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)}; auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); auto const key_hasher = row_hasher.device_hasher(has_nulls); From 83b8a37c727a022999f85b27f53c488ffc6f995c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 26 Sep 2023 19:47:20 -0700 Subject: [PATCH 100/100] Fix compile issue due to merge conflict --- cpp/include/cudf/detail/hash_reduce_by_row.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh index e07b311a7a8..f63d1922950 100644 --- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh +++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh @@ -14,10 +14,10 @@ * limitations under the License. */ +#include +#include #include #include -#include -#include #include #include