diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 9656bc40fd7..ec58c391001 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -323,6 +323,7 @@ add_library(
   src/groupby/sort/group_collect.cu
   src/groupby/sort/group_correlation.cu
   src/groupby/sort/group_count.cu
+  src/groupby/sort/group_histogram.cu
   src/groupby/sort/group_m2.cu
   src/groupby/sort/group_max.cu
   src/groupby/sort/group_min.cu
@@ -471,6 +472,7 @@ add_library(
   src/reductions/all.cu
   src/reductions/any.cu
   src/reductions/collect_ops.cu
+  src/reductions/histogram.cu
   src/reductions/max.cu
   src/reductions/mean.cu
   src/reductions/min.cu
diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index d319041f8b1..d458c831f19 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -116,7 +116,9 @@ class aggregation {
     COVARIANCE,      ///< covariance between two sets of elements
     CORRELATION,     ///< correlation between two sets of elements
     TDIGEST,         ///< create a tdigest from a set of input values
-    MERGE_TDIGEST    ///< create a tdigest by merging multiple tdigests together
+    MERGE_TDIGEST,   ///< create a tdigest by merging multiple tdigests together
+    HISTOGRAM,       ///< compute frequency of each element
+    MERGE_HISTOGRAM  ///< merge partial values of HISTOGRAM aggregation,
   };
 
   aggregation() = delete;
@@ -288,6 +290,11 @@ std::unique_ptr<Base> make_any_aggregation();
 template <typename Base = aggregation>
 std::unique_ptr<Base> make_all_aggregation();
 
+/// Factory to create a HISTOGRAM aggregation
+/// @return A HISTOGRAM aggregation object
+template <typename Base = aggregation>
+std::unique_ptr<Base> make_histogram_aggregation();
+
 /// Factory to create a SUM_OF_SQUARES aggregation
 /// @return A SUM_OF_SQUARES aggregation object
 template <typename Base = aggregation>
@@ -610,6 +617,17 @@ std::unique_ptr<Base> make_merge_sets_aggregation(
 template <typename Base = aggregation>
 std::unique_ptr<Base> make_merge_m2_aggregation();
 
+/**
+ * @brief Factory to create a MERGE_HISTOGRAM aggregation
+ *
+ * Merges the results of `HISTOGRAM` aggregations on independent sets into a new `HISTOGRAM` value
+ * equivalent to if a single `HISTOGRAM` aggregation was done across all of the sets at once.
+ *
+ * @return A MERGE_HISTOGRAM aggregation object
+ */
+template <typename Base = aggregation>
+std::unique_ptr<Base> make_merge_histogram_aggregation();
+
 /**
  * @brief Factory to create a COVARIANCE aggregation
  *
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 4d3984cab93..784f05a964e 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -45,6 +45,8 @@ class simple_aggregations_collector {  // Declares the interface for the simple
                                                           class max_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class count_aggregation const& agg);
+  virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
+                                                          class histogram_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class any_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
@@ -89,6 +91,8 @@ class simple_aggregations_collector {  // Declares the interface for the simple
                                                           class merge_sets_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class merge_m2_aggregation const& agg);
+  virtual std::vector<std::unique_ptr<aggregation>> visit(
+    data_type col_type, class merge_histogram_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class covariance_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
@@ -108,6 +112,7 @@ class aggregation_finalizer {  // Declares the interface for the finalizer
   virtual void visit(class min_aggregation const& agg);
   virtual void visit(class max_aggregation const& agg);
   virtual void visit(class count_aggregation const& agg);
+  virtual void visit(class histogram_aggregation const& agg);
   virtual void visit(class any_aggregation const& agg);
   virtual void visit(class all_aggregation const& agg);
   virtual void visit(class sum_of_squares_aggregation const& agg);
@@ -130,6 +135,7 @@ class aggregation_finalizer {  // Declares the interface for the finalizer
   virtual void visit(class merge_lists_aggregation const& agg);
   virtual void visit(class merge_sets_aggregation const& agg);
   virtual void visit(class merge_m2_aggregation const& agg);
+  virtual void visit(class merge_histogram_aggregation const& agg);
   virtual void visit(class covariance_aggregation const& agg);
   virtual void visit(class correlation_aggregation const& agg);
   virtual void visit(class tdigest_aggregation const& agg);
@@ -251,6 +257,25 @@ class count_aggregation final : public rolling_aggregation,
   void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
 };
 
+/**
+ * @brief Derived class for specifying a histogram aggregation
+ */
+class histogram_aggregation final : public groupby_aggregation, public reduce_aggregation {
+ public:
+  histogram_aggregation() : aggregation(HISTOGRAM) {}
+
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
+  {
+    return std::make_unique<histogram_aggregation>(*this);
+  }
+  std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
+    data_type col_type, simple_aggregations_collector& collector) const override
+  {
+    return collector.visit(col_type, *this);
+  }
+  void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
+};
+
 /**
  * @brief Derived class for specifying an any aggregation
  */
@@ -972,6 +997,25 @@ class merge_m2_aggregation final : public groupby_aggregation {
   void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
 };
 
+/**
+ * @brief Derived aggregation class for specifying MERGE_HISTOGRAM aggregation
+ */
+class merge_histogram_aggregation final : public groupby_aggregation, public reduce_aggregation {
+ public:
+  explicit merge_histogram_aggregation() : aggregation{MERGE_HISTOGRAM} {}
+
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
+  {
+    return std::make_unique<merge_histogram_aggregation>(*this);
+  }
+  std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
+    data_type col_type, simple_aggregations_collector& collector) const override
+  {
+    return collector.visit(col_type, *this);
+  }
+  void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
+};
+
 /**
  * @brief Derived aggregation class for specifying COVARIANCE aggregation
  */
@@ -1148,6 +1192,12 @@ struct target_type_impl<Source, aggregation::COUNT_ALL> {
   using type = size_type;
 };
 
+// Use list for HISTOGRAM
+template <typename SourceType>
+struct target_type_impl<SourceType, aggregation::HISTOGRAM> {
+  using type = list_view;
+};
+
 // Computing ANY of any type, use bool accumulator
 template <typename Source>
 struct target_type_impl<Source, aggregation::ANY> {
@@ -1326,6 +1376,12 @@ struct target_type_impl<SourceType, aggregation::MERGE_M2> {
   using type = struct_view;
 };
 
+// Use list for MERGE_HISTOGRAM
+template <typename SourceType>
+struct target_type_impl<SourceType, aggregation::MERGE_HISTOGRAM> {
+  using type = list_view;
+};
+
 // Always use double for COVARIANCE
 template <typename SourceType>
 struct target_type_impl<SourceType, aggregation::COVARIANCE> {
@@ -1417,6 +1473,8 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind
       return f.template operator()<aggregation::COUNT_VALID>(std::forward<Ts>(args)...);
     case aggregation::COUNT_ALL:
       return f.template operator()<aggregation::COUNT_ALL>(std::forward<Ts>(args)...);
+    case aggregation::HISTOGRAM:
+      return f.template operator()<aggregation::HISTOGRAM>(std::forward<Ts>(args)...);
     case aggregation::ANY:
       return f.template operator()<aggregation::ANY>(std::forward<Ts>(args)...);
     case aggregation::ALL:
@@ -1460,6 +1518,8 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind
       return f.template operator()<aggregation::MERGE_SETS>(std::forward<Ts>(args)...);
     case aggregation::MERGE_M2:
       return f.template operator()<aggregation::MERGE_M2>(std::forward<Ts>(args)...);
+    case aggregation::MERGE_HISTOGRAM:
+      return f.template operator()<aggregation::MERGE_HISTOGRAM>(std::forward<Ts>(args)...);
     case aggregation::COVARIANCE:
       return f.template operator()<aggregation::COVARIANCE>(std::forward<Ts>(args)...);
     case aggregation::CORRELATION:
diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh
index 2d2b43f1d4a..f63d1922950 100644
--- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh
+++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh
@@ -14,12 +14,15 @@
  * limitations under the License.
  */
 
+#include <cudf/hashing/detail/hash_allocator.cuh>
+#include <cudf/hashing/detail/helper_functions.cuh>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/types.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
+#include <rmm/mr/device/polymorphic_allocator.hpp>
 
 #include <thrust/for_each.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -29,6 +32,7 @@
 
 namespace cudf::detail {
 
+using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
 using hash_map_type =
   cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
 
diff --git a/cpp/include/cudf/reduction/detail/histogram.hpp b/cpp/include/cudf/reduction/detail/histogram.hpp
new file mode 100644
index 00000000000..97c711fda4e
--- /dev/null
+++ b/cpp/include/cudf/reduction/detail/histogram.hpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <memory>
+#include <optional>
+
+namespace cudf::reduction::detail {
+
+/**
+ * @brief Compute the frequency for each distinct row in the input table.
+ *
+ * @param input The input table to compute histogram
+ * @param partial_counts An optional column containing count for each row
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate memory of the returned objects
+ * @return A pair of array contains the (stable-order) indices of the distinct rows in the input
+ * table, and their corresponding distinct counts
+ */
+[[nodiscard]] std::pair<std::unique_ptr<rmm::device_uvector<size_type>>, std::unique_ptr<column>>
+compute_row_frequencies(table_view const& input,
+                        std::optional<column_view> const& partial_counts,
+                        rmm::cuda_stream_view stream,
+                        rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Create an empty histogram column.
+ *
+ * A histogram column is a structs column `STRUCT<T, int64_t>` where T is type of the input
+ * values.
+ *
+ * @returns An empty histogram column
+ */
+[[nodiscard]] std::unique_ptr<column> make_empty_histogram_like(column_view const& values);
+
+}  // namespace cudf::reduction::detail
diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp
index 014a6ba70eb..704332c8e1d 100644
--- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp
@@ -131,6 +131,33 @@ std::unique_ptr<scalar> all(column_view const& col,
                             rmm::cuda_stream_view stream,
                             rmm::mr::device_memory_resource* mr);
 
+/**
+ * @brief Compute frequency for each unique element in the input column.
+ *
+ * The result histogram is stored in structs column having two children. The first child contains
+ * unique elements from the input, and the second child contains their corresponding frequencies.
+ *
+ * @param input The column to compute histogram
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return A list_scalar storing a structs column as the result histogram
+ */
+std::unique_ptr<scalar> histogram(column_view const& input,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Merge multiple histograms together.
+ *
+ * @param input The input given as multiple histograms concatenated together
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
+ * @return A list_scalar storing the result histogram
+ */
+std::unique_ptr<scalar> merge_histogram(column_view const& input,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr);
+
 /**
  * @brief Computes product of elements in input column
  *
diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index 2e6a643484e..b3f2a774a60 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -64,6 +64,12 @@ std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   return visit(col_type, static_cast<aggregation const&>(agg));
 }
 
+std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
+  data_type col_type, histogram_aggregation const& agg)
+{
+  return visit(col_type, static_cast<aggregation const&>(agg));
+}
+
 std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   data_type col_type, any_aggregation const& agg)
 {
@@ -196,6 +202,12 @@ std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   return visit(col_type, static_cast<aggregation const&>(agg));
 }
 
+std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
+  data_type col_type, merge_histogram_aggregation const& agg)
+{
+  return visit(col_type, static_cast<aggregation const&>(agg));
+}
+
 std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   data_type col_type, covariance_aggregation const& agg)
 {
@@ -246,6 +258,10 @@ void aggregation_finalizer::visit(count_aggregation const& agg)
 {
   visit(static_cast<aggregation const&>(agg));
 }
+void aggregation_finalizer::visit(histogram_aggregation const& agg)
+{
+  visit(static_cast<aggregation const&>(agg));
+}
 
 void aggregation_finalizer::visit(any_aggregation const& agg)
 {
@@ -357,6 +373,11 @@ void aggregation_finalizer::visit(merge_m2_aggregation const& agg)
   visit(static_cast<aggregation const&>(agg));
 }
 
+void aggregation_finalizer::visit(merge_histogram_aggregation const& agg)
+{
+  visit(static_cast<aggregation const&>(agg));
+}
+
 void aggregation_finalizer::visit(covariance_aggregation const& agg)
 {
   visit(static_cast<aggregation const&>(agg));
@@ -460,6 +481,16 @@ template std::unique_ptr<groupby_aggregation> make_count_aggregation<groupby_agg
 template std::unique_ptr<groupby_scan_aggregation> make_count_aggregation<groupby_scan_aggregation>(
   null_policy null_handling);
 
+/// Factory to create a HISTOGRAM aggregation
+template <typename Base>
+std::unique_ptr<Base> make_histogram_aggregation()
+{
+  return std::make_unique<detail::histogram_aggregation>();
+}
+template std::unique_ptr<aggregation> make_histogram_aggregation<aggregation>();
+template std::unique_ptr<groupby_aggregation> make_histogram_aggregation<groupby_aggregation>();
+template std::unique_ptr<reduce_aggregation> make_histogram_aggregation<reduce_aggregation>();
+
 /// Factory to create a ANY aggregation
 template <typename Base>
 std::unique_ptr<Base> make_any_aggregation()
@@ -764,6 +795,17 @@ std::unique_ptr<Base> make_merge_m2_aggregation()
 template std::unique_ptr<aggregation> make_merge_m2_aggregation<aggregation>();
 template std::unique_ptr<groupby_aggregation> make_merge_m2_aggregation<groupby_aggregation>();
 
+/// Factory to create a MERGE_HISTOGRAM aggregation
+template <typename Base>
+std::unique_ptr<Base> make_merge_histogram_aggregation()
+{
+  return std::make_unique<detail::merge_histogram_aggregation>();
+}
+template std::unique_ptr<aggregation> make_merge_histogram_aggregation<aggregation>();
+template std::unique_ptr<groupby_aggregation>
+make_merge_histogram_aggregation<groupby_aggregation>();
+template std::unique_ptr<reduce_aggregation> make_merge_histogram_aggregation<reduce_aggregation>();
+
 /// Factory to create a COVARIANCE aggregation
 template <typename Base>
 std::unique_ptr<Base> make_covariance_aggregation(size_type min_periods, size_type ddof)
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index ce1fc71968f..e3c021eb66a 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -28,6 +28,7 @@
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/dictionary/dictionary_column_view.hpp>
 #include <cudf/groupby.hpp>
+#include <cudf/reduction/detail/histogram.hpp>
 #include <cudf/strings/string_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
@@ -110,6 +111,15 @@ struct empty_column_constructor {
         0, make_empty_column(type_to_id<size_type>()), empty_like(values), 0, {});
     }
 
+    if constexpr (k == aggregation::Kind::HISTOGRAM) {
+      return make_lists_column(0,
+                               make_empty_column(type_to_id<size_type>()),
+                               cudf::reduction::detail::make_empty_histogram_like(values),
+                               0,
+                               {});
+    }
+    if constexpr (k == aggregation::Kind::MERGE_HISTOGRAM) { return empty_like(values); }
+
     if constexpr (k == aggregation::Kind::RANK) {
       auto const& rank_agg = dynamic_cast<cudf::detail::rank_aggregation const&>(agg);
       if (rank_agg._method == cudf::rank_method::AVERAGE or
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 3f977dc81d7..10c271f76f9 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -89,6 +89,18 @@ void aggregate_result_functor::operator()<aggregation::COUNT_ALL>(aggregation co
     detail::group_count_all(helper.group_offsets(stream), helper.num_groups(stream), stream, mr));
 }
 
+template <>
+void aggregate_result_functor::operator()<aggregation::HISTOGRAM>(aggregation const& agg)
+{
+  if (cache.has_result(values, agg)) return;
+
+  cache.add_result(
+    values,
+    agg,
+    detail::group_histogram(
+      get_grouped_values(), helper.group_labels(stream), helper.num_groups(stream), stream, mr));
+}
+
 template <>
 void aggregate_result_functor::operator()<aggregation::SUM>(aggregation const& agg)
 {
@@ -534,6 +546,24 @@ void aggregate_result_functor::operator()<aggregation::MERGE_M2>(aggregation con
       get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr));
 }
 
+/**
+ * @brief Perform merging for multiple histograms that correspond to the same key value.
+ *
+ * The partial results input to this aggregation is a structs column that is concatenated from
+ * multiple outputs of HISTOGRAM aggregations.
+ */
+template <>
+void aggregate_result_functor::operator()<aggregation::MERGE_HISTOGRAM>(aggregation const& agg)
+{
+  if (cache.has_result(values, agg)) { return; }
+
+  cache.add_result(
+    values,
+    agg,
+    detail::group_merge_histogram(
+      get_grouped_values(), helper.group_offsets(stream), helper.num_groups(stream), stream, mr));
+}
+
 /**
  * @brief Creates column views with only valid elements in both input column views
  *
diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu
new file mode 100644
index 00000000000..bb70037aaef
--- /dev/null
+++ b/cpp/src/groupby/sort/group_histogram.cu
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <lists/utilities.hpp>
+
+#include <cudf/aggregation.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/gather.hpp>
+#include <cudf/detail/labeling/label_segments.cuh>
+#include <cudf/reduction/detail/histogram.hpp>
+#include <cudf/structs/structs_column_view.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/device_buffer.hpp>
+
+#include <thrust/gather.h>
+
+namespace cudf::groupby::detail {
+
+namespace {
+
+std::unique_ptr<column> build_histogram(column_view const& values,
+                                        cudf::device_span<size_type const> group_labels,
+                                        std::optional<column_view> const& partial_counts,
+                                        size_type num_groups,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  CUDF_EXPECTS(static_cast<size_t>(values.size()) == group_labels.size(),
+               "Size of values column should be the same as that of group labels.",
+               std::invalid_argument);
+
+  // Attach group labels to the input values.
+  auto const labels_cv      = column_view{data_type{type_to_id<size_type>()},
+                                     static_cast<size_type>(group_labels.size()),
+                                     group_labels.data(),
+                                     nullptr,
+                                     0};
+  auto const labeled_values = table_view{{labels_cv, values}};
+
+  // Build histogram for the labeled values.
+  auto [distinct_indices, distinct_counts] =
+    cudf::reduction::detail::compute_row_frequencies(labeled_values, partial_counts, stream, mr);
+
+  // Gather the distinct rows for the output histogram.
+  auto out_table = cudf::detail::gather(labeled_values,
+                                        *distinct_indices,
+                                        out_of_bounds_policy::DONT_CHECK,
+                                        cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                        stream,
+                                        mr);
+
+  // Build offsets for the output lists column containing output histograms.
+  // Each list will be a histogram corresponding to one value group.
+  auto out_offsets = cudf::lists::detail::reconstruct_offsets(
+    out_table->get_column(0).view(), num_groups, stream, mr);
+
+  std::vector<std::unique_ptr<column>> struct_children;
+  struct_children.emplace_back(std::move(out_table->release().back()));
+  struct_children.emplace_back(std::move(distinct_counts));
+  auto out_structs = make_structs_column(static_cast<size_type>(distinct_indices->size()),
+                                         std::move(struct_children),
+                                         0,
+                                         {},
+                                         stream,
+                                         mr);
+
+  return make_lists_column(
+    num_groups, std::move(out_offsets), std::move(out_structs), 0, {}, stream, mr);
+}
+
+}  // namespace
+
+std::unique_ptr<column> group_histogram(column_view const& values,
+                                        cudf::device_span<size_type const> group_labels,
+                                        size_type num_groups,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  // Empty group should be handled before reaching here.
+  CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument);
+
+  return build_histogram(values, group_labels, std::nullopt, num_groups, stream, mr);
+}
+
+std::unique_ptr<column> group_merge_histogram(column_view const& values,
+                                              cudf::device_span<size_type const> group_offsets,
+                                              size_type num_groups,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource* mr)
+{
+  // Empty group should be handled before reaching here.
+  CUDF_EXPECTS(num_groups > 0, "Group should not be empty.", std::invalid_argument);
+
+  // The input must be a lists column without nulls.
+  CUDF_EXPECTS(!values.has_nulls(), "The input column must not have nulls.", std::invalid_argument);
+  CUDF_EXPECTS(values.type().id() == type_id::LIST,
+               "The input of MERGE_HISTOGRAM aggregation must be a lists column.",
+               std::invalid_argument);
+
+  // Child of the input lists column must be a structs column without nulls,
+  // and its second child is a columns of integer type having no nulls.
+  auto const lists_cv     = lists_column_view{values};
+  auto const histogram_cv = lists_cv.get_sliced_child(stream);
+  CUDF_EXPECTS(!histogram_cv.has_nulls(),
+               "Child of the input lists column must not have nulls.",
+               std::invalid_argument);
+  CUDF_EXPECTS(histogram_cv.type().id() == type_id::STRUCT && histogram_cv.num_children() == 2,
+               "The input column has invalid histograms structure.",
+               std::invalid_argument);
+  CUDF_EXPECTS(
+    cudf::is_integral(histogram_cv.child(1).type()) && !histogram_cv.child(1).has_nulls(),
+    "The input column has invalid histograms structure.",
+    std::invalid_argument);
+
+  // Concatenate the histograms corresponding to the same key values.
+  // That is equivalent to creating a new lists column (view) from the input lists column
+  // with new offsets gathered as below.
+  auto new_offsets = rmm::device_uvector<size_type>(num_groups + 1, stream);
+  thrust::gather(rmm::exec_policy(stream),
+                 group_offsets.begin(),
+                 group_offsets.end(),
+                 lists_cv.offsets_begin(),
+                 new_offsets.begin());
+
+  // Generate labels for the new lists.
+  auto key_labels = rmm::device_uvector<size_type>(histogram_cv.size(), stream);
+  cudf::detail::label_segments(
+    new_offsets.begin(), new_offsets.end(), key_labels.begin(), key_labels.end(), stream);
+
+  auto const structs_cv   = structs_column_view{histogram_cv};
+  auto const input_values = structs_cv.get_sliced_child(0, stream);
+  auto const input_counts = structs_cv.get_sliced_child(1, stream);
+
+  return build_histogram(input_values, key_labels, input_counts, num_groups, stream, mr);
+}
+
+}  // namespace cudf::groupby::detail
diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp
index fc24b679db5..3aa79f226a3 100644
--- a/cpp/src/groupby/sort/group_reductions.hpp
+++ b/cpp/src/groupby/sort/group_reductions.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -216,6 +216,33 @@ std::unique_ptr<column> group_count_all(cudf::device_span<size_type const> group
                                         size_type num_groups,
                                         rmm::cuda_stream_view stream,
                                         rmm::mr::device_memory_resource* mr);
+/**
+ * @brief Internal API to compute histogram for each group in @p values.
+ *
+ * The returned column is a lists column, each list corresponds to one input group and stores the
+ * histogram of the distinct elements in that group in the form of `STRUCT<value, count>`.
+ *
+ * Note that the order of distinct elements in each output list is not specified.
+ *
+ * @code{.pseudo}
+ * values       = [2, 1, 1, 3, 5, 2, 2, 3, 1, 4]
+ * group_labels = [0, 0, 0, 1, 1, 1, 1, 1, 2, 2]
+ * num_groups   = 3
+ *
+ * output = [[<1, 2>, <2, 1>], [<2, 2>, <3, 2>, <5, 1>], [<1, 1>, <4, 1>]]
+ * @endcode
+ *
+ * @param values Grouped values to compute histogram
+ * @param group_labels ID of group that the corresponding value belongs to
+ * @param num_groups Number of groups
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<column> group_histogram(column_view const& values,
+                                        cudf::device_span<size_type const> group_labels,
+                                        size_type num_groups,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Internal API to calculate sum of squares of differences from means.
@@ -441,6 +468,34 @@ std::unique_ptr<column> group_merge_m2(column_view const& values,
                                        size_type num_groups,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Internal API to merge multiple output of HISTOGRAM aggregation.
+ *
+ * The input values column should be given as a lists column in the form of
+ * `LIST<STRUCT<value, count>>`.
+ * After merging, the order of distinct elements in each output list is not specified.
+ *
+ * @code{.pseudo}
+ * values        = [ [<1, 2>, <2, 1>], [<2, 2>], [<3, 2>, <2, 1>], [<1, 1>, <2, 1>] ]
+ * group_offsets = [ 0,                          2,                                 4]
+ * num_groups    = 2
+ *
+ * output = [[<1, 2>, <2, 3>], [<1, 1>, <2, 2>, <3, 2>]]]
+ * @endcode
+ *
+ * @param values Grouped values to get valid count of
+ * @param group_offsets Offsets of groups' starting points within @p values
+ * @param num_groups Number of groups
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<column> group_merge_histogram(column_view const& values,
+                                              cudf::device_span<size_type const> group_offsets,
+                                              size_type num_groups,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource* mr);
+
 /**
  * @brief Internal API to find covariance of child columns of a non-nullable struct column.
  *
diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu
new file mode 100644
index 00000000000..fa84bbeb25d
--- /dev/null
+++ b/cpp/src/reductions/histogram.cu
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/gather.hpp>
+#include <cudf/detail/hash_reduce_by_row.cuh>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/structs/structs_column_view.hpp>
+
+#include <thrust/copy.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/tuple.h>
+
+#include <cuda/atomic>
+
+#include <optional>
+
+namespace cudf::reduction::detail {
+
+namespace {
+
+// Always use 64-bit signed integer for storing count.
+using histogram_count_type = int64_t;
+
+/**
+ * @brief The functor to accumulate the frequency of each distinct rows in the input table.
+ */
+template <typename MapView, typename KeyHasher, typename KeyEqual, typename CountType>
+struct reduce_fn : cudf::detail::reduce_by_row_fn_base<MapView, KeyHasher, KeyEqual, CountType> {
+  CountType const* d_partial_output;
+
+  reduce_fn(MapView const& d_map,
+            KeyHasher const& d_hasher,
+            KeyEqual const& d_equal,
+            CountType* const d_output,
+            CountType const* const d_partial_output)
+    : cudf::detail::reduce_by_row_fn_base<MapView, KeyHasher, KeyEqual, CountType>{d_map,
+                                                                                   d_hasher,
+                                                                                   d_equal,
+                                                                                   d_output},
+      d_partial_output{d_partial_output}
+  {
+  }
+
+  // Count the number of rows in each group of rows that are compared equal.
+  __device__ void operator()(size_type const idx) const
+  {
+    auto const increment = d_partial_output ? d_partial_output[idx] : CountType{1};
+    auto const count =
+      cuda::atomic_ref<CountType, cuda::thread_scope_device>(*this->get_output_ptr(idx));
+    count.fetch_add(increment, cuda::std::memory_order_relaxed);
+  }
+};
+
+/**
+ * @brief The builder to construct an instance of `reduce_fn` functor.
+ */
+template <typename CountType>
+struct reduce_func_builder {
+  CountType const* const d_partial_output;
+
+  reduce_func_builder(CountType const* const d_partial_output) : d_partial_output{d_partial_output}
+  {
+  }
+
+  template <typename MapView, typename KeyHasher, typename KeyEqual>
+  auto build(MapView const& d_map,
+             KeyHasher const& d_hasher,
+             KeyEqual const& d_equal,
+             CountType* const d_output)
+  {
+    return reduce_fn<MapView, KeyHasher, KeyEqual, CountType>{
+      d_map, d_hasher, d_equal, d_output, d_partial_output};
+  }
+};
+
+/**
+ * @brief Specialized functor to check for not-zero of the second component of the input.
+ */
+struct is_not_zero {
+  template <typename Pair>
+  __device__ bool operator()(Pair const input) const
+  {
+    return thrust::get<1>(input) != 0;
+  }
+};
+
+/**
+ * @brief Building a histogram by gathering distinct rows from the input table and their
+ * corresponding distinct counts.
+ *
+ * @param input The input table
+ * @param distinct_indices Indices of the distinct rows
+ * @param distinct_counts Distinct counts corresponding to the distinct rows
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned object's device memory
+ * @return A list_scalar storing the output histogram
+ */
+auto gather_histogram(table_view const& input,
+                      device_span<size_type const> distinct_indices,
+                      std::unique_ptr<column>&& distinct_counts,
+                      rmm::cuda_stream_view stream,
+                      rmm::mr::device_memory_resource* mr)
+{
+  auto distinct_rows = cudf::detail::gather(input,
+                                            distinct_indices,
+                                            out_of_bounds_policy::DONT_CHECK,
+                                            cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                            stream,
+                                            mr);
+
+  std::vector<std::unique_ptr<column>> struct_children;
+  struct_children.emplace_back(std::move(distinct_rows->release().front()));
+  struct_children.emplace_back(std::move(distinct_counts));
+  auto output_structs = make_structs_column(
+    static_cast<size_type>(distinct_indices.size()), std::move(struct_children), 0, {}, stream, mr);
+
+  return std::make_unique<cudf::list_scalar>(
+    std::move(*output_structs.release()), true, stream, mr);
+}
+
+}  // namespace
+
+std::unique_ptr<column> make_empty_histogram_like(column_view const& values)
+{
+  std::vector<std::unique_ptr<column>> struct_children;
+  struct_children.emplace_back(empty_like(values));
+  struct_children.emplace_back(make_numeric_column(data_type{type_id::INT64}, 0));
+  return std::make_unique<column>(data_type{type_id::STRUCT},
+                                  0,
+                                  rmm::device_buffer{},
+                                  rmm::device_buffer{},
+                                  0,
+                                  std::move(struct_children));
+}
+
+std::pair<std::unique_ptr<rmm::device_uvector<size_type>>, std::unique_ptr<column>>
+compute_row_frequencies(table_view const& input,
+                        std::optional<column_view> const& partial_counts,
+                        rmm::cuda_stream_view stream,
+                        rmm::mr::device_memory_resource* mr)
+{
+  auto const has_nested_columns = cudf::detail::has_nested_columns(input);
+
+  // Nested types are not tested, thus we just throw exception if we see such input for now.
+  // We should remove this check after having enough tests.
+  CUDF_EXPECTS(!has_nested_columns,
+               "Nested types are not yet supported in histogram aggregation.",
+               std::invalid_argument);
+
+  auto map = cudf::detail::hash_map_type{
+    compute_hash_table_size(input.num_rows()),
+    cuco::empty_key{-1},
+    cuco::empty_value{std::numeric_limits<size_type>::min()},
+    cudf::detail::hash_table_allocator_type{default_allocator<char>{}, stream},
+    stream.value()};
+
+  auto const preprocessed_input =
+    cudf::experimental::row::hash::preprocessed_table::create(input, stream);
+  auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)};
+
+  auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
+  auto const key_hasher = row_hasher.device_hasher(has_nulls);
+  auto const row_comp   = cudf::experimental::row::equality::self_comparator(preprocessed_input);
+
+  auto const pair_iter = cudf::detail::make_counting_transform_iterator(
+    size_type{0}, [] __device__(size_type const i) { return cuco::make_pair(i, i); });
+
+  // Always compare NaNs as equal.
+  using nan_equal_comparator =
+    cudf::experimental::row::equality::nan_equal_physical_equality_comparator;
+  auto const value_comp = nan_equal_comparator{};
+
+  if (has_nested_columns) {
+    auto const key_equal = row_comp.equal_to<true>(has_nulls, null_equality::EQUAL, value_comp);
+    map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value());
+  } else {
+    auto const key_equal = row_comp.equal_to<false>(has_nulls, null_equality::EQUAL, value_comp);
+    map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value());
+  }
+
+  // Gather the indices of distinct rows.
+  auto distinct_indices = std::make_unique<rmm::device_uvector<size_type>>(
+    static_cast<size_type>(map.get_size()), stream, mr);
+
+  // Store the number of occurrences of each distinct row.
+  auto distinct_counts = make_numeric_column(data_type{type_to_id<histogram_count_type>()},
+                                             static_cast<size_type>(map.get_size()),
+                                             mask_state::UNALLOCATED,
+                                             stream,
+                                             mr);
+
+  // Compute frequencies (aka distinct counts) for the input rows.
+  // Note that we consider null and NaNs as always equal.
+  auto const reduction_results = cudf::detail::hash_reduce_by_row(
+    map,
+    preprocessed_input,
+    input.num_rows(),
+    has_nulls,
+    has_nested_columns,
+    null_equality::EQUAL,
+    nan_equality::ALL_EQUAL,
+    reduce_func_builder<histogram_count_type>{
+      partial_counts ? partial_counts.value().begin<histogram_count_type>() : nullptr},
+    histogram_count_type{0},
+    stream,
+    rmm::mr::get_current_device_resource());
+
+  auto const input_it = thrust::make_zip_iterator(
+    thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin()));
+  auto const output_it = thrust::make_zip_iterator(thrust::make_tuple(
+    distinct_indices->begin(), distinct_counts->mutable_view().begin<histogram_count_type>()));
+
+  // Reduction results above are either group sizes of equal rows, or `0`.
+  // The final output is non-zero group sizes only.
+  thrust::copy_if(
+    rmm::exec_policy(stream), input_it, input_it + input.num_rows(), output_it, is_not_zero{});
+
+  return {std::move(distinct_indices), std::move(distinct_counts)};
+}
+
+std::unique_ptr<cudf::scalar> histogram(column_view const& input,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  // Empty group should be handled before reaching here.
+  CUDF_EXPECTS(input.size() > 0, "Input should not be empty.", std::invalid_argument);
+
+  auto const input_tv = table_view{{input}};
+  auto [distinct_indices, distinct_counts] =
+    compute_row_frequencies(input_tv, std::nullopt, stream, mr);
+  return gather_histogram(input_tv, *distinct_indices, std::move(distinct_counts), stream, mr);
+}
+
+std::unique_ptr<cudf::scalar> merge_histogram(column_view const& input,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource* mr)
+{
+  // Empty group should be handled before reaching here.
+  CUDF_EXPECTS(input.size() > 0, "Input should not be empty.", std::invalid_argument);
+  CUDF_EXPECTS(!input.has_nulls(), "The input column must not have nulls.", std::invalid_argument);
+  CUDF_EXPECTS(input.type().id() == type_id::STRUCT && input.num_children() == 2,
+               "The input must be a structs column having two children.",
+               std::invalid_argument);
+  CUDF_EXPECTS(cudf::is_integral(input.child(1).type()) && !input.child(1).has_nulls(),
+               "The second child of the input column must be of integral type and without nulls.",
+               std::invalid_argument);
+
+  auto const structs_cv   = structs_column_view{input};
+  auto const input_values = structs_cv.get_sliced_child(0, stream);
+  auto const input_counts = structs_cv.get_sliced_child(1, stream);
+
+  auto const values_tv = table_view{{input_values}};
+  auto [distinct_indices, distinct_counts] =
+    compute_row_frequencies(values_tv, input_counts, stream, mr);
+  return gather_histogram(values_tv, *distinct_indices, std::move(distinct_counts), stream, mr);
+}
+
+}  // namespace cudf::reduction::detail
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index 2fef8aa8785..23171baaa45 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -23,6 +23,7 @@
 #include <cudf/detail/stream_compaction.hpp>
 #include <cudf/detail/tdigest/tdigest.hpp>
 #include <cudf/reduction.hpp>
+#include <cudf/reduction/detail/histogram.hpp>
 #include <cudf/reduction/detail/reduction_functions.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/structs/structs_column_view.hpp>
@@ -59,6 +60,8 @@ struct reduce_dispatch_functor {
       case aggregation::MAX: return max(col, output_dtype, init, stream, mr);
       case aggregation::ANY: return any(col, output_dtype, init, stream, mr);
       case aggregation::ALL: return all(col, output_dtype, init, stream, mr);
+      case aggregation::HISTOGRAM: return histogram(col, stream, mr);
+      case aggregation::MERGE_HISTOGRAM: return merge_histogram(col, stream, mr);
       case aggregation::SUM_OF_SQUARES: return sum_of_squares(col, output_dtype, stream, mr);
       case aggregation::MEAN: return mean(col, output_dtype, stream, mr);
       case aggregation::VARIANCE: {
@@ -165,6 +168,15 @@ std::unique_ptr<scalar> reduce(column_view const& col,
       return tdigest::detail::make_empty_tdigest_scalar(stream, mr);
     }
 
+    if (agg.kind == aggregation::HISTOGRAM) {
+      return std::make_unique<list_scalar>(
+        std::move(*reduction::detail::make_empty_histogram_like(col)), true, stream, mr);
+    }
+    if (agg.kind == aggregation::MERGE_HISTOGRAM) {
+      return std::make_unique<list_scalar>(
+        std::move(*reduction::detail::make_empty_histogram_like(col.child(0))), true, stream, mr);
+    }
+
     if (output_dtype.id() == type_id::LIST) {
       if (col.type() == output_dtype) { return make_empty_scalar_like(col, stream, mr); }
       // Under some circumstance, the output type will become the List of input type,
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 68ff6c54c99..04939f3cd6d 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -116,6 +116,7 @@ ConfigureTest(
   groupby/covariance_tests.cpp
   groupby/groupby_test_util.cpp
   groupby/groups_tests.cpp
+  groupby/histogram_tests.cpp
   groupby/keys_tests.cpp
   groupby/lists_tests.cpp
   groupby/m2_tests.cpp
diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp
new file mode 100644
index 00000000000..c5833f40cf2
--- /dev/null
+++ b/cpp/tests/groupby/histogram_tests.cpp
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/copying.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/groupby.hpp>
+#include <cudf/lists/sorting.hpp>
+#include <cudf/sorting.hpp>
+
+using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
+using int64s_col  = cudf::test::fixed_width_column_wrapper<int64_t>;
+using structs_col = cudf::test::structs_column_wrapper;
+
+auto groupby_histogram(cudf::column_view const& keys,
+                       cudf::column_view const& values,
+                       cudf::aggregation::Kind agg_kind)
+{
+  CUDF_EXPECTS(
+    agg_kind == cudf::aggregation::HISTOGRAM || agg_kind == cudf::aggregation::MERGE_HISTOGRAM,
+    "Aggregation must be either HISTOGRAM or MERGE_HISTOGRAM.");
+
+  std::vector<cudf::groupby::aggregation_request> requests;
+  requests.emplace_back();
+  requests[0].values = values;
+  if (agg_kind == cudf::aggregation::HISTOGRAM) {
+    requests[0].aggregations.push_back(
+      cudf::make_histogram_aggregation<cudf::groupby_aggregation>());
+  } else {
+    requests[0].aggregations.push_back(
+      cudf::make_merge_histogram_aggregation<cudf::groupby_aggregation>());
+  }
+
+  auto gb_obj              = cudf::groupby::groupby(cudf::table_view({keys}));
+  auto const agg_results   = gb_obj.aggregate(requests, cudf::test::get_default_stream());
+  auto const agg_histogram = agg_results.second[0].results[0]->view();
+  EXPECT_EQ(agg_histogram.type().id(), cudf::type_id::LIST);
+  EXPECT_EQ(agg_histogram.null_count(), 0);
+
+  auto const histograms = cudf::lists_column_view{agg_histogram}.child();
+  EXPECT_EQ(histograms.num_children(), 2);
+  EXPECT_EQ(histograms.null_count(), 0);
+  EXPECT_EQ(histograms.child(1).null_count(), 0);
+
+  auto const key_sort_order = cudf::sorted_order(agg_results.first->view(), {}, {});
+  auto sorted_keys =
+    std::move(cudf::gather(agg_results.first->view(), *key_sort_order)->release().front());
+  auto const sorted_vals =
+    std::move(cudf::gather(cudf::table_view{{agg_histogram}}, *key_sort_order)->release().front());
+  auto sorted_histograms = cudf::lists::sort_lists(cudf::lists_column_view{*sorted_vals},
+                                                   cudf::order::ASCENDING,
+                                                   cudf::null_order::BEFORE,
+                                                   rmm::mr::get_current_device_resource());
+
+  return std::pair{std::move(sorted_keys), std::move(sorted_histograms)};
+}
+
+template <typename T>
+struct GroupbyHistogramTest : public cudf::test::BaseFixture {};
+
+template <typename T>
+struct GroupbyMergeHistogramTest : public cudf::test::BaseFixture {};
+
+// Avoid unsigned types, as the tests below have negative values in their input.
+using HistogramTestTypes = cudf::test::Concat<cudf::test::Types<int8_t, int16_t, int32_t, int64_t>,
+                                              cudf::test::FloatingPointTypes,
+                                              cudf::test::FixedPointTypes,
+                                              cudf::test::ChronoTypes>;
+TYPED_TEST_SUITE(GroupbyHistogramTest, HistogramTestTypes);
+TYPED_TEST_SUITE(GroupbyMergeHistogramTest, HistogramTestTypes);
+
+TYPED_TEST(GroupbyHistogramTest, EmptyInput)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+
+  auto const keys   = int32s_col{};
+  auto const values = col_data{};
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM);
+
+  // The structure of the output is already verified in the function `groupby_histogram`.
+  ASSERT_EQ(res_histogram->size(), 0);
+}
+
+TYPED_TEST(GroupbyHistogramTest, SimpleInputNoNull)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+
+  // key = 0: values = [2, 2, -3, -2, 2]
+  // key = 1: values = [2, 0, 5, 2, 1]
+  // key = 2: values = [-3, 1, 1, 2, 2]
+  auto const keys   = int32s_col{2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2};
+  auto const values = col_data{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2};
+
+  auto const expected_keys      = int32s_col{0, 1, 2};
+  auto const expected_histogram = [] {
+    auto structs = [] {
+      auto values = col_data{-3, -2, 2, 0, 1, 2, 5, -3, 1, 2};
+      auto counts = int64s_col{1, 1, 3, 1, 1, 2, 1, 1, 2, 2};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      3, int32s_col{0, 3, 7, 10}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram);
+}
+
+TYPED_TEST(GroupbyHistogramTest, SlicedInputNoNull)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+
+  auto const keys_original = int32s_col{2, 0, 2, 1, 0, 2, 0, 2, 1, 1, 1, 0, 0, 0, 1, 2, 2, 1, 0, 2};
+  auto const values_original =
+    col_data{1, 2, 0, 2, 1, -3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1, 2, 1, 2, 2};
+  // key = 0: values = [2, 2, -3, -2, 2]
+  // key = 1: values = [2, 0, 5, 2, 1]
+  // key = 2: values = [-3, 1, 1, 2, 2]
+  auto const keys   = cudf::slice(keys_original, {5, 20})[0];
+  auto const values = cudf::slice(values_original, {5, 20})[0];
+
+  auto const expected_keys      = int32s_col{0, 1, 2};
+  auto const expected_histogram = [] {
+    auto structs = [] {
+      auto values = col_data{-3, -2, 2, 0, 1, 2, 5, -3, 1, 2};
+      auto counts = int64s_col{1, 1, 3, 1, 1, 2, 1, 1, 2, 2};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      3, int32s_col{0, 3, 7, 10}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram);
+}
+
+TYPED_TEST(GroupbyHistogramTest, InputWithNulls)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+  using namespace cudf::test::iterators;
+  auto constexpr null{0};
+
+  // key = 0: values = [-3, null, 2, null, 2]
+  // key = 1: values = [1, 2, null, 5, 2, -3, 1, 1]
+  // key = 2: values = [null, 2, 0, -2, 2, null, 2]
+  auto const keys = int32s_col{2, 0, 2, 1, 1, 1, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 2, 1, 0, 2};
+  auto const values =
+    col_data{{null, -3, 2, 1, 2, null, 0, 5, 2, null, -3, -2, 2, null, 1, 2, null, 1, 2, 2},
+             nulls_at({0, 5, 9, 13, 16})};
+
+  auto const expected_keys      = int32s_col{0, 1, 2};
+  auto const expected_histogram = [] {
+    auto structs = [] {
+      auto values = col_data{{null, -3, 2, null, -3, 1, 2, 5, null, -2, 0, 2}, nulls_at({0, 3, 8})};
+      auto counts = int64s_col{2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 1, 3};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      3, int32s_col{0, 3, 8, 12}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram);
+}
+
+TYPED_TEST(GroupbyHistogramTest, SlicedInputWithNulls)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+  using namespace cudf::test::iterators;
+  auto constexpr null{0};
+
+  auto const keys_original =
+    int32s_col{1, 0, 2, 2, 0, 2, 0, 2, 1, 1, 1, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 2, 1, 0, 2, 0, 1, 2};
+  auto const values_original =
+    col_data{{null, 1,  1,  2, 1,    null, -3, 2,    1, 2, null, 0,    5, 2,
+              null, -3, -2, 2, null, 1,    2,  null, 1, 2, 2,    null, 1, 2},
+             nulls_at({0, 5, 10, 14, 18, 21, 25})};
+
+  // key = 0: values = [-3, null, 2, null, 2]
+  // key = 1: values = [1, 2, null, 5, 2, -3, 1, 1]
+  // key = 2: values = [null, 2, 0, -2, 2, null, 2]
+  auto const keys   = cudf::slice(keys_original, {5, 25})[0];
+  auto const values = cudf::slice(values_original, {5, 25})[0];
+
+  auto const expected_keys      = int32s_col{0, 1, 2};
+  auto const expected_histogram = [] {
+    auto structs = [] {
+      auto values = col_data{{null, -3, 2, null, -3, 1, 2, 5, null, -2, 0, 2}, nulls_at({0, 3, 8})};
+      auto counts = int64s_col{2, 1, 2, 1, 1, 3, 2, 1, 2, 1, 1, 3};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      3, int32s_col{0, 3, 8, 12}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, values, cudf::aggregation::HISTOGRAM);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram);
+}
+
+TYPED_TEST(GroupbyMergeHistogramTest, EmptyInput)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+
+  auto const keys   = int32s_col{};
+  auto const values = [] {
+    auto structs = [] {
+      auto values = col_data{};
+      auto counts = int64s_col{};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      0, int32s_col{}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, *values, cudf::aggregation::MERGE_HISTOGRAM);
+
+  // The structure of the output is already verified in the function `groupby_histogram`.
+  ASSERT_EQ(res_histogram->size(), 0);
+}
+
+TYPED_TEST(GroupbyMergeHistogramTest, SimpleInputNoNull)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+
+  // key = 0: histograms = [[<-3, 1>, <-2, 1>, <2, 3>], [<0, 1>, <1, 1>], [<-3, 3>, <0, 1>, <1, 2>]]
+  // key = 1: histograms = [[<-2, 1>, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]]
+  auto const keys   = int32s_col{0, 1, 0, 1, 0};
+  auto const values = [] {
+    auto structs = [] {
+      auto values = col_data{-3, -2, 2, -2, 1, 2, 0, 1, 0, 1, 2, -3, 0, 1};
+      auto counts = int64s_col{1, 1, 3, 1, 3, 2, 1, 1, 2, 1, 2, 3, 1, 2};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      5, int32s_col{0, 3, 6, 8, 11, 14}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const expected_keys      = int32s_col{0, 1};
+  auto const expected_histogram = [] {
+    auto structs = [] {
+      auto values = col_data{-3, -2, 0, 1, 2, -2, 0, 1, 2};
+      auto counts = int64s_col{4, 1, 2, 3, 3, 1, 2, 4, 4};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      2, int32s_col{0, 5, 9}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, *values, cudf::aggregation::MERGE_HISTOGRAM);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram);
+}
+
+TYPED_TEST(GroupbyMergeHistogramTest, SlicedInputNoNull)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+
+  // key = 0: histograms = [[<-3, 1>, <-2, 1>, <2, 3>], [<0, 1>, <1, 1>], [<-3, 3>, <0, 1>, <1, 2>]]
+  // key = 1: histograms = [[<-2, 1>, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]]
+  auto const keys_original   = int32s_col{0, 1, 0, 1, 0, 1, 0};
+  auto const values_original = [] {
+    auto structs = [] {
+      auto values = col_data{0, 2, -3, 1, -3, -2, 2, -2, 1, 2, 0, 1, 0, 1, 2, -3, 0, 1};
+      auto counts = int64s_col{1, 2, 3, 1, 1, 1, 3, 1, 3, 2, 1, 1, 2, 1, 2, 3, 1, 2};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(7,
+                                   int32s_col{0, 2, 4, 7, 10, 12, 15, 18}.release(),
+                                   structs.release(),
+                                   0,
+                                   rmm::device_buffer{});
+  }();
+  auto const keys   = cudf::slice(keys_original, {2, 7})[0];
+  auto const values = cudf::slice(*values_original, {2, 7})[0];
+
+  auto const expected_keys      = int32s_col{0, 1};
+  auto const expected_histogram = [] {
+    auto structs = [] {
+      auto values = col_data{-3, -2, 0, 1, 2, -2, 0, 1, 2};
+      auto counts = int64s_col{4, 1, 2, 3, 3, 1, 2, 4, 4};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      2, int32s_col{0, 5, 9}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, values, cudf::aggregation::MERGE_HISTOGRAM);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram);
+}
+
+TYPED_TEST(GroupbyMergeHistogramTest, InputWithNulls)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+  using namespace cudf::test::iterators;
+  auto constexpr null{0};
+
+  // key = 0: histograms = [[<null, 1>, <2, 3>], [<null, 2>, <1, 1>], [<0, 1>, <1, 2>]]
+  // key = 1: histograms = [[<null, 1>, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]]
+  auto const keys   = int32s_col{0, 1, 1, 0, 0};
+  auto const values = [] {
+    auto structs = [] {
+      auto values = col_data{{null, 2, null, 1, 2, 0, 1, 2, null, 1, 0, 1}, nulls_at({0, 2, 8})};
+      auto counts = int64s_col{1, 3, 1, 3, 2, 2, 1, 2, 2, 1, 1, 2};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      5, int32s_col{0, 2, 5, 8, 10, 12}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const expected_keys      = int32s_col{0, 1};
+  auto const expected_histogram = [] {
+    auto structs = [] {
+      auto values = col_data{{null, 0, 1, 2, null, 0, 1, 2}, nulls_at({0, 4})};
+      auto counts = int64s_col{3, 1, 3, 3, 1, 2, 4, 4};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      2, int32s_col{0, 4, 8}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, *values, cudf::aggregation::MERGE_HISTOGRAM);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram);
+}
+
+TYPED_TEST(GroupbyMergeHistogramTest, SlicedInputWithNulls)
+{
+  using col_data = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+  using namespace cudf::test::iterators;
+  auto constexpr null{0};
+
+  // key = 0: histograms = [[<null, 1>, <2, 3>], [<null, 2>, <1, 1>], [<0, 1>, <1, 2>]]
+  // key = 1: histograms = [[<null, 1>, <1, 3>, <2, 2>], [<0, 2>, <1, 1>, <2, 2>]]
+  auto const keys_original   = int32s_col{0, 1, 0, 1, 1, 0, 0};
+  auto const values_original = [] {
+    auto structs = [] {
+      auto values = col_data{{null, 2, null, 1, null, 2, null, 1, 2, 0, 1, 2, null, 1, 0, 1},
+                             nulls_at({0, 2, 4, 6, 12})};
+      auto counts = int64s_col{1, 3, 2, 1, 1, 3, 1, 3, 2, 2, 1, 2, 2, 1, 1, 2};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(7,
+                                   int32s_col{0, 2, 4, 6, 9, 12, 14, 16}.release(),
+                                   structs.release(),
+                                   0,
+                                   rmm::device_buffer{});
+  }();
+  auto const keys   = cudf::slice(keys_original, {2, 7})[0];
+  auto const values = cudf::slice(*values_original, {2, 7})[0];
+
+  auto const expected_keys      = int32s_col{0, 1};
+  auto const expected_histogram = [] {
+    auto structs = [] {
+      auto values = col_data{{null, 0, 1, 2, null, 0, 1, 2}, nulls_at({0, 4})};
+      auto counts = int64s_col{3, 1, 3, 3, 1, 2, 4, 4};
+      return structs_col{{values, counts}};
+    }();
+    return cudf::make_lists_column(
+      2, int32s_col{0, 4, 8}.release(), structs.release(), 0, rmm::device_buffer{});
+  }();
+
+  auto const [res_keys, res_histogram] =
+    groupby_histogram(keys, values, cudf::aggregation::MERGE_HISTOGRAM);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *res_keys);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected_histogram, *res_histogram);
+}
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 2561f3f9886..7644ac48892 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -28,6 +28,7 @@
 #include <cudf/reduction.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/sorting.hpp>
 #include <cudf/types.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
@@ -379,6 +380,212 @@ TYPED_TEST(ReductionTest, SumOfSquare)
             expected_null_value);
 }
 
+auto histogram_reduction(cudf::column_view const& input,
+                         std::unique_ptr<cudf::reduce_aggregation> const& agg)
+{
+  CUDF_EXPECTS(
+    agg->kind == cudf::aggregation::HISTOGRAM || agg->kind == cudf::aggregation::MERGE_HISTOGRAM,
+    "Aggregation must be either HISTOGRAM or MERGE_HISTOGRAM.");
+
+  auto const result_scalar = cudf::reduce(input, *agg, cudf::data_type{cudf::type_id::INT64});
+  EXPECT_EQ(result_scalar->is_valid(), true);
+
+  auto const result_list_scalar = dynamic_cast<cudf::list_scalar*>(result_scalar.get());
+  EXPECT_NE(result_list_scalar, nullptr);
+
+  auto const histogram = result_list_scalar->view();
+  EXPECT_EQ(histogram.num_children(), 2);
+  EXPECT_EQ(histogram.null_count(), 0);
+  EXPECT_EQ(histogram.child(1).null_count(), 0);
+
+  // Sort the histogram based on the first column (unique input values).
+  auto const sort_order = cudf::sorted_order(cudf::table_view{{histogram.child(0)}}, {}, {});
+  return std::move(cudf::gather(cudf::table_view{{histogram}}, *sort_order)->release().front());
+}
+
+template <typename T>
+struct ReductionHistogramTest : public cudf::test::BaseFixture {};
+
+// Avoid unsigned types, as the tests below have negative values in their input.
+using HistogramTestTypes = cudf::test::Concat<cudf::test::Types<int8_t, int16_t, int32_t, int64_t>,
+                                              cudf::test::FloatingPointTypes,
+                                              cudf::test::FixedPointTypes,
+                                              cudf::test::ChronoTypes>;
+TYPED_TEST_SUITE(ReductionHistogramTest, HistogramTestTypes);
+
+TYPED_TEST(ReductionHistogramTest, Histogram)
+{
+  using data_col    = cudf::test::fixed_width_column_wrapper<TypeParam, int>;
+  using int64_col   = cudf::test::fixed_width_column_wrapper<int64_t>;
+  using structs_col = cudf::test::structs_column_wrapper;
+
+  auto const agg = cudf::make_histogram_aggregation<reduce_aggregation>();
+
+  // Empty input.
+  {
+    auto const input    = data_col{};
+    auto const expected = [] {
+      auto child1 = data_col{};
+      auto child2 = int64_col{};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+
+  {
+    auto const input    = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1};
+    auto const expected = [] {
+      auto child1 = data_col{-3, -2, 0, 1, 2, 5};
+      auto child2 = int64_col{2, 1, 1, 2, 4, 1};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+
+  // Test without nulls, sliced input.
+  {
+    auto const input_original = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1};
+    auto const input          = cudf::slice(input_original, {0, 7})[0];
+    auto const expected       = [] {
+      auto child1 = data_col{-3, 0, 1, 2, 5};
+      auto child2 = int64_col{1, 1, 1, 3, 1};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+
+  // Test with nulls.
+  using namespace cudf::test::iterators;
+  auto constexpr null{0};
+  {
+    auto const input    = data_col{{null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1},
+                                nulls_at({0, 6, 9, 12})};
+    auto const expected = [] {
+      auto child1 = data_col{{null, -3, -2, 0, 1, 2, 5}, null_at(0)};
+      auto child2 = int64_col{4, 2, 1, 1, 2, 4, 1};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+
+  // Test with nulls, sliced input.
+  {
+    auto const input_original = data_col{
+      {null, -3, 2, 1, 2, 0, null, 5, 2, null, -3, -2, null, 2, 1}, nulls_at({0, 6, 9, 12})};
+    auto const input    = cudf::slice(input_original, {0, 9})[0];
+    auto const expected = [] {
+      auto child1 = data_col{{null, -3, 0, 1, 2, 5}, null_at(0)};
+      auto child2 = int64_col{2, 1, 1, 1, 3, 1};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+}
+
+TYPED_TEST(ReductionHistogramTest, MergeHistogram)
+{
+  using data_col    = cudf::test::fixed_width_column_wrapper<TypeParam>;
+  using int64_col   = cudf::test::fixed_width_column_wrapper<int64_t>;
+  using structs_col = cudf::test::structs_column_wrapper;
+
+  auto const agg = cudf::make_merge_histogram_aggregation<reduce_aggregation>();
+
+  // Empty input.
+  {
+    auto const input = [] {
+      auto child1 = data_col{};
+      auto child2 = int64_col{};
+      return structs_col{{child1, child2}};
+    }();
+    auto const expected = [] {
+      auto child1 = data_col{};
+      auto child2 = int64_col{};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+
+  // Test without nulls.
+  {
+    auto const input = [] {
+      auto child1 = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1};
+      auto child2 = int64_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4};
+      return structs_col{{child1, child2}};
+    }();
+
+    auto const expected = [] {
+      auto child1 = data_col{-3, -2, 0, 1, 2, 5};
+      auto child2 = int64_col{5, 5, 4, 5, 8, 1};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+
+  // Test without nulls, sliced input.
+  {
+    auto const input_original = [] {
+      auto child1 = data_col{-3, 2, 1, 2, 0, 5, 2, -3, -2, 2, 1};
+      auto child2 = int64_col{2, 1, 1, 2, 4, 1, 2, 3, 5, 3, 4};
+      return structs_col{{child1, child2}};
+    }();
+    auto const input = cudf::slice(input_original, {0, 7})[0];
+
+    auto const expected = [] {
+      auto child1 = data_col{-3, 0, 1, 2, 5};
+      auto child2 = int64_col{2, 4, 1, 5, 1};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+
+  // Test with nulls.
+  using namespace cudf::test::iterators;
+  auto constexpr null{0};
+  {
+    auto const input = [] {
+      auto child1 = data_col{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null},
+                             nulls_at({2, 5, 8, 11, 15})};
+      auto child2 = int64_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19};
+      return structs_col{{child1, child2}};
+    }();
+
+    auto const expected = [] {
+      auto child1 = data_col{{null, -3, -2, 0, 1, 2, 5}, null_at(0)};
+      auto child2 = int64_col{67, 5, 5, 4, 5, 8, 1};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+
+  // Test with nulls, sliced input.
+  {
+    auto const input_original = [] {
+      auto child1 = data_col{{-3, 2, null, 1, 2, null, 0, 5, null, 2, -3, null, -2, 2, 1, null},
+                             nulls_at({2, 5, 8, 11, 15})};
+      auto child2 = int64_col{2, 1, 12, 1, 2, 11, 4, 1, 10, 2, 3, 15, 5, 3, 4, 19};
+      return structs_col{{child1, child2}};
+    }();
+    auto const input = cudf::slice(input_original, {0, 9})[0];
+
+    auto const expected = [] {
+      auto child1 = data_col{{null, -3, 0, 1, 2, 5}, null_at(0)};
+      auto child2 = int64_col{33, 2, 4, 1, 3, 1};
+      return structs_col{{child1, child2}};
+    }();
+    auto const result = histogram_reduction(input, agg);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result);
+  }
+}
+
 template <typename T>
 struct ReductionAnyAllTest : public ReductionTest<bool> {};
 using AnyAllTypes = cudf::test::Types<int32_t, float, bool>;