diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 9656bc40fd7..ec58c391001 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -323,6 +323,7 @@ add_library(
src/groupby/sort/group_collect.cu
src/groupby/sort/group_correlation.cu
src/groupby/sort/group_count.cu
+ src/groupby/sort/group_histogram.cu
src/groupby/sort/group_m2.cu
src/groupby/sort/group_max.cu
src/groupby/sort/group_min.cu
@@ -471,6 +472,7 @@ add_library(
src/reductions/all.cu
src/reductions/any.cu
src/reductions/collect_ops.cu
+ src/reductions/histogram.cu
src/reductions/max.cu
src/reductions/mean.cu
src/reductions/min.cu
diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index d319041f8b1..d458c831f19 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -116,7 +116,9 @@ class aggregation {
COVARIANCE, ///< covariance between two sets of elements
CORRELATION, ///< correlation between two sets of elements
TDIGEST, ///< create a tdigest from a set of input values
- MERGE_TDIGEST ///< create a tdigest by merging multiple tdigests together
+ MERGE_TDIGEST, ///< create a tdigest by merging multiple tdigests together
+ HISTOGRAM, ///< compute frequency of each element
+ MERGE_HISTOGRAM ///< merge partial values of HISTOGRAM aggregation,
};
aggregation() = delete;
@@ -288,6 +290,11 @@ std::unique_ptr make_any_aggregation();
template
std::unique_ptr make_all_aggregation();
+/// Factory to create a HISTOGRAM aggregation
+/// @return A HISTOGRAM aggregation object
+template
+std::unique_ptr make_histogram_aggregation();
+
/// Factory to create a SUM_OF_SQUARES aggregation
/// @return A SUM_OF_SQUARES aggregation object
template
@@ -610,6 +617,17 @@ std::unique_ptr make_merge_sets_aggregation(
template
std::unique_ptr make_merge_m2_aggregation();
+/**
+ * @brief Factory to create a MERGE_HISTOGRAM aggregation
+ *
+ * Merges the results of `HISTOGRAM` aggregations on independent sets into a new `HISTOGRAM` value
+ * equivalent to if a single `HISTOGRAM` aggregation was done across all of the sets at once.
+ *
+ * @return A MERGE_HISTOGRAM aggregation object
+ */
+template
+std::unique_ptr make_merge_histogram_aggregation();
+
/**
* @brief Factory to create a COVARIANCE aggregation
*
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 4d3984cab93..784f05a964e 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -45,6 +45,8 @@ class simple_aggregations_collector { // Declares the interface for the simple
class max_aggregation const& agg);
virtual std::vector> visit(data_type col_type,
class count_aggregation const& agg);
+ virtual std::vector> visit(data_type col_type,
+ class histogram_aggregation const& agg);
virtual std::vector> visit(data_type col_type,
class any_aggregation const& agg);
virtual std::vector> visit(data_type col_type,
@@ -89,6 +91,8 @@ class simple_aggregations_collector { // Declares the interface for the simple
class merge_sets_aggregation const& agg);
virtual std::vector> visit(data_type col_type,
class merge_m2_aggregation const& agg);
+ virtual std::vector> visit(
+ data_type col_type, class merge_histogram_aggregation const& agg);
virtual std::vector> visit(data_type col_type,
class covariance_aggregation const& agg);
virtual std::vector> visit(data_type col_type,
@@ -108,6 +112,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer
virtual void visit(class min_aggregation const& agg);
virtual void visit(class max_aggregation const& agg);
virtual void visit(class count_aggregation const& agg);
+ virtual void visit(class histogram_aggregation const& agg);
virtual void visit(class any_aggregation const& agg);
virtual void visit(class all_aggregation const& agg);
virtual void visit(class sum_of_squares_aggregation const& agg);
@@ -130,6 +135,7 @@ class aggregation_finalizer { // Declares the interface for the finalizer
virtual void visit(class merge_lists_aggregation const& agg);
virtual void visit(class merge_sets_aggregation const& agg);
virtual void visit(class merge_m2_aggregation const& agg);
+ virtual void visit(class merge_histogram_aggregation const& agg);
virtual void visit(class covariance_aggregation const& agg);
virtual void visit(class correlation_aggregation const& agg);
virtual void visit(class tdigest_aggregation const& agg);
@@ -251,6 +257,25 @@ class count_aggregation final : public rolling_aggregation,
void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
};
+/**
+ * @brief Derived class for specifying a histogram aggregation
+ */
+class histogram_aggregation final : public groupby_aggregation, public reduce_aggregation {
+ public:
+ histogram_aggregation() : aggregation(HISTOGRAM) {}
+
+ [[nodiscard]] std::unique_ptr clone() const override
+ {
+ return std::make_unique(*this);
+ }
+ std::vector> get_simple_aggregations(
+ data_type col_type, simple_aggregations_collector& collector) const override
+ {
+ return collector.visit(col_type, *this);
+ }
+ void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
+};
+
/**
* @brief Derived class for specifying an any aggregation
*/
@@ -972,6 +997,25 @@ class merge_m2_aggregation final : public groupby_aggregation {
void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
};
+/**
+ * @brief Derived aggregation class for specifying MERGE_HISTOGRAM aggregation
+ */
+class merge_histogram_aggregation final : public groupby_aggregation, public reduce_aggregation {
+ public:
+ explicit merge_histogram_aggregation() : aggregation{MERGE_HISTOGRAM} {}
+
+ [[nodiscard]] std::unique_ptr clone() const override
+ {
+ return std::make_unique(*this);
+ }
+ std::vector> get_simple_aggregations(
+ data_type col_type, simple_aggregations_collector& collector) const override
+ {
+ return collector.visit(col_type, *this);
+ }
+ void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
+};
+
/**
* @brief Derived aggregation class for specifying COVARIANCE aggregation
*/
@@ -1148,6 +1192,12 @@ struct target_type_impl