diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index efdb85691bd..ec93c709163 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -199,10 +199,30 @@ struct window_bounds { * column of the same type as the input. Therefore it is suggested to convert integer column types * (especially low-precision integers) to `FLOAT32` or `FLOAT64` before doing a rolling `MEAN`. * + * Note: `preceding_window` and `following_window` could well have negative values. This yields + * windows where the current row might not be included at all. For instance, consider a window + * defined as (preceding=3, following=-1). This produces a window from 2 (i.e. 3-1) rows preceding + * the current row, and 1 row *preceding* the current row. For the example above, the window for + * row#3 is: + * + * [ 10, 20, 10, 50, 60, 20, 30, 80, 40 ] + * <--window--> ^ + * | + * current_row + * + * Similarly, `preceding` could have a negative value, indicating that the window begins at a + * position after the current row. It differs slightly from the semantics for `following`, because + * `preceding` includes the current row. Therefore: + * 1. preceding=1 => Window starts at the current row. + * 2. preceding=0 => Window starts at 1 past the current row. + * 3. preceding=-1 => Window starts at 2 past the current row. Etc. + * * @param[in] group_keys The (pre-sorted) grouping columns * @param[in] input The input column (to be aggregated) - * @param[in] preceding_window The static rolling window size in the backward direction - * @param[in] following_window The static rolling window size in the forward direction + * @param[in] preceding_window The static rolling window size in the backward direction (for + * positive values), or forward direction (for negative values) + * @param[in] following_window The static rolling window size in the forward direction (for positive + * values), or backward direction (for negative values) * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh index 3b6d53f43c4..0648ef3d30f 100644 --- a/cpp/src/rolling/detail/rolling.cuh +++ b/cpp/src/rolling/detail/rolling.cuh @@ -70,7 +70,22 @@ namespace cudf { namespace detail { -namespace { // anonymous +/// Helper function to materialize preceding/following offsets. +template +std::unique_ptr expand_to_column(Calculator const& calc, + size_type const& num_rows, + rmm::cuda_stream_view stream) +{ + auto window_column = cudf::make_numeric_column( + cudf::data_type{type_to_id()}, num_rows, cudf::mask_state::UNALLOCATED, stream); + + auto begin = cudf::detail::make_counting_transform_iterator(0, calc); + + thrust::copy_n( + rmm::exec_policy(stream), begin, num_rows, window_column->mutable_view().data()); + + return window_column; +} /** * @brief Operator for applying a generic (non-specialized) rolling aggregation on a single window. @@ -91,14 +106,14 @@ struct DeviceRolling { // operations we do support template - DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) + explicit DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) : min_periods(_min_periods) { } // operations we don't support template - DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) + explicit DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) : min_periods(_min_periods) { CUDF_FAIL("Invalid aggregation/type pair"); @@ -111,7 +126,7 @@ struct DeviceRolling { mutable_column_device_view& output, size_type start_index, size_type end_index, - size_type current_index) + size_type current_index) const { using AggOp = typename corresponding_operator::type; AggOp agg_op; @@ -144,7 +159,7 @@ struct DeviceRolling { template struct DeviceRollingArgMinMaxBase { size_type min_periods; - DeviceRollingArgMinMaxBase(size_type _min_periods) : min_periods(_min_periods) {} + explicit DeviceRollingArgMinMaxBase(size_type _min_periods) : min_periods(_min_periods) {} static constexpr bool is_supported() { @@ -162,7 +177,7 @@ struct DeviceRollingArgMinMaxBase { */ template struct DeviceRollingArgMinMaxString : DeviceRollingArgMinMaxBase { - DeviceRollingArgMinMaxString(size_type _min_periods) + explicit DeviceRollingArgMinMaxString(size_type _min_periods) : DeviceRollingArgMinMaxBase(_min_periods) { } @@ -461,8 +476,8 @@ struct agg_specific_empty_output { } }; -std::unique_ptr empty_output_for_rolling_aggregation(column_view const& input, - rolling_aggregation const& agg) +static std::unique_ptr empty_output_for_rolling_aggregation(column_view const& input, + rolling_aggregation const& agg) { // TODO: // Ideally, for UDF aggregations, the returned column would match @@ -1215,8 +1230,6 @@ struct dispatch_rolling { } }; -} // namespace - // Applies a user-defined rolling window function to the values in a column. template std::unique_ptr rolling_window_udf(column_view const& input, diff --git a/cpp/src/rolling/detail/rolling_fixed_window.cu b/cpp/src/rolling/detail/rolling_fixed_window.cu index fb7b1b5f590..e951db955e5 100644 --- a/cpp/src/rolling/detail/rolling_fixed_window.cu +++ b/cpp/src/rolling/detail/rolling_fixed_window.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,9 @@ #include #include +#include + #include -#include namespace cudf::detail { @@ -43,6 +44,9 @@ std::unique_ptr rolling_window(column_view const& input, CUDF_EXPECTS((default_outputs.is_empty() || default_outputs.size() == input.size()), "Defaults column must be either empty or have as many rows as the input column."); + CUDF_EXPECTS(-(preceding_window - 1) <= following_window, + "Preceding window bounds must precede the following window bounds."); + if (agg.kind == aggregation::CUDA || agg.kind == aggregation::PTX) { // TODO: In future, might need to clamp preceding/following to column boundaries. return cudf::detail::rolling_window_udf(input, @@ -58,18 +62,22 @@ std::unique_ptr rolling_window(column_view const& input, // Clamp preceding/following to column boundaries. // E.g. If preceding_window == 2, then for a column of 5 elements, preceding_window will be: // [1, 2, 2, 2, 1] - auto const preceding_window_begin = cudf::detail::make_counting_transform_iterator( - 0, - [preceding_window] __device__(size_type i) { return thrust::min(i + 1, preceding_window); }); - auto const following_window_begin = cudf::detail::make_counting_transform_iterator( - 0, [col_size = input.size(), following_window] __device__(size_type i) { - return thrust::min(col_size - i - 1, following_window); - }); + auto const preceding_calc = [preceding_window] __device__(size_type i) { + return thrust::min(i + 1, preceding_window); + }; + + auto const following_calc = [col_size = input.size(), + following_window] __device__(size_type i) { + return thrust::min(col_size - i - 1, following_window); + }; + + auto const preceding_column = expand_to_column(preceding_calc, input.size(), stream); + auto const following_column = expand_to_column(following_calc, input.size(), stream); return cudf::detail::rolling_window(input, default_outputs, - preceding_window_begin, - following_window_begin, + preceding_column->view().begin(), + following_column->view().begin(), min_periods, agg, stream, diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index ca5c04d1c4f..6e69b5157c2 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -30,7 +30,6 @@ #include #include -#include #include #include #include @@ -94,6 +93,109 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, namespace detail { +/// Preceding window calculation functor. +template +struct row_based_preceding_calc { + cudf::size_type const* _group_offsets_begin; + cudf::size_type const* _group_labels_begin; + cudf::size_type const _preceding_window; + + row_based_preceding_calc(rmm::device_uvector const& group_offsets, + rmm::device_uvector const& group_labels, + cudf::size_type const& preceding_window) + : _group_offsets_begin(group_offsets.data()), + _group_labels_begin(group_labels.data()), + _preceding_window(preceding_window) + { + } + + __device__ cudf::size_type operator()(cudf::size_type const& idx) const + { + auto group_label = _group_labels_begin[idx]; + if constexpr (preceding_less_than_1) { // where 1 indicates only the current row. + auto group_end = _group_offsets_begin[group_label + 1]; + return thrust::maximum{}(_preceding_window, -(group_end - 1 - idx)); + } else { + auto group_start = _group_offsets_begin[group_label]; + return thrust::minimum{}(_preceding_window, + idx - group_start + 1); // Preceding includes current row. + } + } +}; + +/// Helper to materialize preceding-window column, corrected to respect group boundaries. +/// E.g. If preceding window == 5, then, +/// 1. For the first row in the group, the preceding is set to 1, +/// 2. For the next row in the group, preceding is set to 2, etc. +std::unique_ptr make_preceding_column( + rmm::device_uvector const& group_offsets, + rmm::device_uvector const& group_labels, + cudf::size_type const& preceding_window, + cudf::size_type const& num_rows, + rmm::cuda_stream_view stream) +{ + if (preceding_window < 1) { + auto const calc = row_based_preceding_calc(group_offsets, group_labels, preceding_window); + return cudf::detail::expand_to_column(calc, num_rows, stream); + } else { + auto const calc = + row_based_preceding_calc(group_offsets, group_labels, preceding_window); + return cudf::detail::expand_to_column(calc, num_rows, stream); + } +} + +/// Following window calculation functor. +template +struct row_based_following_calc { + cudf::size_type const* _group_offsets_begin; + cudf::size_type const* _group_labels_begin; + cudf::size_type const _following_window; + + row_based_following_calc(rmm::device_uvector const& group_offsets, + rmm::device_uvector const& group_labels, + cudf::size_type const& following_window) + : _group_offsets_begin(group_offsets.data()), + _group_labels_begin(group_labels.data()), + _following_window(following_window) + { + } + + __device__ cudf::size_type operator()(cudf::size_type const& idx) const + { + auto group_label = _group_labels_begin[idx]; + if constexpr (following_less_than_0) { + auto group_start = _group_offsets_begin[group_label]; + return thrust::maximum{}(_following_window, -(idx - group_start) - 1); + } else { + auto group_end = + _group_offsets_begin[group_label + 1]; // Cannot fall off the end, since offsets + // is capped with `input.size()`. + return thrust::minimum{}(_following_window, (group_end - 1) - idx); + } + } +}; + +/// Helper to materialize following-window column, corrected to respect group boundaries. +/// i.e. If following window == 5, then: +/// 1. For the last row in the group, the following is set to 0. +/// 2. For the second last row in the group, following is set to 1, etc. +std::unique_ptr make_following_column( + rmm::device_uvector const& group_offsets, + rmm::device_uvector const& group_labels, + cudf::size_type const& following_window, + cudf::size_type const& num_rows, + rmm::cuda_stream_view stream) +{ + if (following_window < 0) { + auto const calc = row_based_following_calc(group_offsets, group_labels, following_window); + return cudf::detail::expand_to_column(calc, num_rows, stream); + } else { + auto const calc = + row_based_following_calc(group_offsets, group_labels, following_window); + return cudf::detail::expand_to_column(calc, num_rows, stream); + } +} + std::unique_ptr grouped_rolling_window(table_view const& group_keys, column_view const& input, column_view const& default_outputs, @@ -111,7 +213,7 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, CUDF_EXPECTS((group_keys.num_columns() == 0 || group_keys.num_rows() == input.size()), "Size mismatch between group_keys and input vector."); - CUDF_EXPECTS((min_periods > 0), "min_periods must be positive"); + CUDF_EXPECTS((min_periods >= 0), "min_periods must be non-negative"); CUDF_EXPECTS((default_outputs.is_empty() || default_outputs.size() == input.size()), "Defaults column must be either empty or have as many rows as the input column."); @@ -127,6 +229,9 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, auto const preceding_window = preceding_window_bounds.value(); auto const following_window = following_window_bounds.value(); + CUDF_EXPECTS(-(preceding_window - 1) <= following_window, + "Preceding window bounds must precede the following window bounds."); + if (group_keys.num_columns() == 0) { // No Groupby columns specified. Treat as one big group. return rolling_window( @@ -157,24 +262,6 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, group_offsets.element(group_offsets.size() - 1, stream) == input.size() && "Must have at least one group."); - auto preceding_calculator = [d_group_offsets = group_offsets.data(), - d_group_labels = group_labels.data(), - preceding_window] __device__(size_type idx) { - auto group_label = d_group_labels[idx]; - auto group_start = d_group_offsets[group_label]; - return thrust::minimum{}(preceding_window, - idx - group_start + 1); // Preceding includes current row. - }; - - auto following_calculator = [d_group_offsets = group_offsets.data(), - d_group_labels = group_labels.data(), - following_window] __device__(size_type idx) { - auto group_label = d_group_labels[idx]; - auto group_end = d_group_offsets[group_label + 1]; // Cannot fall off the end, since offsets - // is capped with `input.size()`. - return thrust::minimum{}(following_window, (group_end - 1) - idx); - }; - if (aggr.kind == aggregation::CUDA || aggr.kind == aggregation::PTX) { cudf::detail::preceding_window_wrapper grouped_preceding_window{ group_offsets.data(), group_labels.data(), preceding_window}; @@ -192,15 +279,18 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, stream, mr); } else { - return cudf::detail::rolling_window( - input, - default_outputs, - cudf::detail::make_counting_transform_iterator(0, preceding_calculator), - cudf::detail::make_counting_transform_iterator(0, following_calculator), - min_periods, - aggr, - stream, - mr); + auto const preceding_column = + make_preceding_column(group_offsets, group_labels, preceding_window, input.size(), stream); + auto const following_column = + make_following_column(group_offsets, group_labels, following_window, input.size(), stream); + return cudf::detail::rolling_window(input, + default_outputs, + preceding_column->view().begin(), + following_column->view().begin(), + min_periods, + aggr, + stream, + mr); } } @@ -321,22 +411,6 @@ std::tuple get_null_bounds_for_orderby_column( : std::make_tuple(num_rows - num_nulls, num_rows); } -template -std::unique_ptr expand_to_column(Calculator const& calc, - size_type const& num_rows, - rmm::cuda_stream_view stream) -{ - auto window_column = cudf::make_numeric_column( - cudf::data_type{type_to_id()}, num_rows, cudf::mask_state::UNALLOCATED, stream); - - auto begin = cudf::detail::make_counting_transform_iterator(0, calc); - - thrust::copy_n( - rmm::exec_policy(stream), begin, num_rows, window_column->mutable_view().data()); - - return window_column; -} - /// Range window computation, with /// 1. no grouping keys specified /// 2. rows in ASCENDING order. @@ -390,7 +464,8 @@ std::unique_ptr range_window_ASC(column_view const& input, 1; // Add 1, for `preceding` to account for current row. }; - auto const preceding_column = expand_to_column(preceding_calculator, input.size(), stream); + auto const preceding_column = + cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); auto const following_calculator = [nulls_begin_idx = h_nulls_begin_idx, @@ -425,7 +500,8 @@ std::unique_ptr range_window_ASC(column_view const& input, 1; }; - auto const following_column = expand_to_column(following_calculator, input.size(), stream); + auto const following_column = + cudf::detail::expand_to_column(following_calculator, input.size(), stream); return cudf::detail::rolling_window( input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); @@ -570,7 +646,8 @@ std::unique_ptr range_window_ASC(column_view const& input, 1; // Add 1, for `preceding` to account for current row. }; - auto const preceding_column = expand_to_column(preceding_calculator, input.size(), stream); + auto const preceding_column = + cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); auto const following_calculator = [d_group_offsets = group_offsets.data(), @@ -616,7 +693,8 @@ std::unique_ptr range_window_ASC(column_view const& input, 1; }; - auto const following_column = expand_to_column(following_calculator, input.size(), stream); + auto const following_column = + cudf::detail::expand_to_column(following_calculator, input.size(), stream); return cudf::detail::rolling_window( input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); @@ -675,7 +753,8 @@ std::unique_ptr range_window_DESC(column_view const& input, 1; // Add 1, for `preceding` to account for current row. }; - auto const preceding_column = expand_to_column(preceding_calculator, input.size(), stream); + auto const preceding_column = + cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); auto const following_calculator = [nulls_begin_idx = h_nulls_begin_idx, @@ -710,7 +789,8 @@ std::unique_ptr range_window_DESC(column_view const& input, 1; }; - auto const following_column = expand_to_column(following_calculator, input.size(), stream); + auto const following_column = + cudf::detail::expand_to_column(following_calculator, input.size(), stream); return cudf::detail::rolling_window( input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); @@ -774,7 +854,8 @@ std::unique_ptr range_window_DESC(column_view const& input, 1; // Add 1, for `preceding` to account for current row. }; - auto const preceding_column = expand_to_column(preceding_calculator, input.size(), stream); + auto const preceding_column = + cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); auto const following_calculator = [d_group_offsets = group_offsets.data(), @@ -817,7 +898,8 @@ std::unique_ptr range_window_DESC(column_view const& input, 1; }; - auto const following_column = expand_to_column(following_calculator, input.size(), stream); + auto const following_column = + cudf::detail::expand_to_column(following_calculator, input.size(), stream); if (aggr.kind == aggregation::CUDA || aggr.kind == aggregation::PTX) { CUDF_FAIL("Ranged rolling window does NOT (yet) support UDF."); diff --git a/cpp/src/rolling/rolling.cu b/cpp/src/rolling/rolling.cu index d699d7bea85..5c78cc4382d 100644 --- a/cpp/src/rolling/rolling.cu +++ b/cpp/src/rolling/rolling.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,6 @@ #include #include -#include - namespace cudf { // Applies a fixed-size rolling window function to the values in a column, with default output diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 4923ef5c903..6ac96904362 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -453,6 +453,7 @@ ConfigureTest( rolling/grouped_rolling_test.cpp rolling/lead_lag_test.cpp rolling/nth_element_test.cpp + rolling/offset_row_window_test.cpp rolling/range_comparator_test.cu rolling/range_rolling_window_test.cpp rolling/range_window_bounds_test.cpp diff --git a/cpp/tests/rolling/grouped_rolling_test.cpp b/cpp/tests/rolling/grouped_rolling_test.cpp index 774f2f7fc40..7dd72ace53c 100644 --- a/cpp/tests/rolling/grouped_rolling_test.cpp +++ b/cpp/tests/rolling/grouped_rolling_test.cpp @@ -33,9 +33,6 @@ #include #include -#include -#include - const std::string cuda_func{ R"***( template @@ -637,7 +634,7 @@ TYPED_TEST(GroupedRollingTest, ZeroWindow) key_1_vec.end()); const cudf::table_view grouping_keys{std::vector{key_0, key_1}}; - cudf::size_type preceding_window = 0; + cudf::size_type preceding_window = 1; cudf::size_type following_window = 0; std::vector expected_group_offsets{0, 4, 8, DATA_SIZE}; diff --git a/cpp/tests/rolling/offset_row_window_test.cpp b/cpp/tests/rolling/offset_row_window_test.cpp new file mode 100644 index 00000000000..ec726878b34 --- /dev/null +++ b/cpp/tests/rolling/offset_row_window_test.cpp @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +template +using fwcw = cudf::test::fixed_width_column_wrapper; +template +using decimals_column = cudf::test::fixed_point_column_wrapper; +using ints_column = fwcw; +using bigints_column = fwcw; +using strings_column = cudf::test::strings_column_wrapper; +using lists_column = cudf::test::lists_column_wrapper; +using column_ptr = std::unique_ptr; +using cudf::test::iterators::all_nulls; +using cudf::test::iterators::no_nulls; +using cudf::test::iterators::nulls_at; + +auto constexpr null = int32_t{0}; // NULL representation for int32_t; + +struct OffsetRowWindowTest : public cudf::test::BaseFixture { + static ints_column const _keys; // {0, 0, 0, 0, 0, 0, 1, 1, 1, 1}; + static ints_column const _values; // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + struct rolling_runner { + cudf::window_bounds _preceding, _following; + cudf::size_type _min_periods; + bool _grouped = true; + + rolling_runner(cudf::window_bounds const& preceding, + cudf::window_bounds const& following, + cudf::size_type min_periods_ = 1) + : _preceding{preceding}, _following{following}, _min_periods{min_periods_} + { + } + + rolling_runner& min_periods(cudf::size_type min_periods_) + { + _min_periods = min_periods_; + return *this; + } + + rolling_runner& grouped(bool grouped_) + { + _grouped = grouped_; + return *this; + } + + std::unique_ptr operator()(cudf::rolling_aggregation const& agg) const + { + auto const grouping_keys = + _grouped ? std::vector{_keys} : std::vector{}; + return cudf::grouped_rolling_window( + cudf::table_view{grouping_keys}, _values, _preceding, _following, _min_periods, agg); + } + }; +}; + +ints_column const OffsetRowWindowTest::_keys{0, 0, 0, 0, 0, 0, 1, 1, 1, 1}; +ints_column const OffsetRowWindowTest::_values{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + +auto const AGG_COUNT_NON_NULL = + cudf::make_count_aggregation(cudf::null_policy::EXCLUDE); +auto const AGG_COUNT_ALL = + cudf::make_count_aggregation(cudf::null_policy::INCLUDE); +auto const AGG_MIN = cudf::make_min_aggregation(); +auto const AGG_MAX = cudf::make_max_aggregation(); +auto const AGG_SUM = cudf::make_sum_aggregation(); +auto const AGG_COLLECT_LIST = cudf::make_collect_list_aggregation(); + +TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_3_to_Minus_1) +{ + auto const preceding = cudf::window_bounds::get(3); + auto const following = cudf::window_bounds::get(-1); + auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(true); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{0, 1, 2, 2, 2, 2, 0, 1, 2, 2}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{0, 1, 2, 2, 2, 2, 0, 1, 2, 2}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_MIN), ints_column{{null, 0, 0, 1, 2, 3, null, 6, 6, 7}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_MAX), ints_column{{null, 0, 1, 2, 3, 4, null, 6, 7, 8}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_SUM), + bigints_column{{null, 0, 1, 3, 5, 7, null, 6, 13, 15}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{}, {0}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {}, {6}, {6, 7}, {7, 8}}, + nulls_at({0, 6})}); + + run_rolling.min_periods(0); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{0, 1, 2, 2, 2, 2, 0, 1, 2, 2}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{0, 1, 2, 2, 2, 2, 0, 1, 2, 2}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{}, {0}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {}, {6}, {6, 7}, {7, 8}}, no_nulls()}); +} + +TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_3_to_Minus_1) +{ + auto const preceding = cudf::window_bounds::get(3); + auto const following = cudf::window_bounds::get(-1); + auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(false); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_MIN), + ints_column{{null, 0, 0, 1, 2, 3, 4, 5, 6, 7}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_MAX), + ints_column{{null, 0, 1, 2, 3, 4, 5, 6, 7, 8}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_SUM), bigints_column{{null, 0, 1, 3, 5, 7, 9, 11, 13, 15}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{}, {0}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}}, + nulls_at({0})}); + + run_rolling.min_periods(0); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{}, {0}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}}, + no_nulls()}); +} + +TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_0_to_2) +{ + auto const preceding = cudf::window_bounds::get(0); + auto const following = cudf::window_bounds::get(2); + auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(true); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{2, 2, 2, 2, 1, null, 2, 2, 1, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COUNT_ALL), + ints_column{{2, 2, 2, 2, 1, null, 2, 2, 1, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_MIN), ints_column{{1, 2, 3, 4, 5, null, 7, 8, 9, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_MAX), ints_column{{2, 3, 4, 5, 5, null, 8, 9, 9, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_SUM), + bigints_column{{3, 5, 7, 9, 5, null, 15, 17, 9, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}}, + nulls_at({5, 9})}); + + run_rolling.min_periods(0); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{2, 2, 2, 2, 1, 0, 2, 2, 1, 0}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{2, 2, 2, 2, 1, 0, 2, 2, 1, 0}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}}, no_nulls}); +} + +TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2) +{ + auto const preceding = cudf::window_bounds::get(0); + auto const following = cudf::window_bounds::get(2); + auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(false); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{2, 2, 2, 2, 2, 2, 2, 2, 1, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{2, 2, 2, 2, 2, 2, 2, 2, 1, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_MIN), + ints_column{{1, 2, 3, 4, 5, 6, 7, 8, 9, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_MAX), + ints_column{{2, 3, 4, 5, 6, 7, 8, 9, 9, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_SUM), bigints_column{{3, 5, 7, 9, 11, 13, 15, 17, 9, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9}, {}}, + nulls_at({9})}); + + run_rolling.min_periods(0); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{2, 2, 2, 2, 2, 2, 2, 2, 1, 0}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{2, 2, 2, 2, 2, 2, 2, 2, 1, 0}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9}, {}}, + no_nulls}); +} + +// To test that preceding bounds are clamped correctly at group boundaries. +TEST_F(OffsetRowWindowTest, TestNegativeBoundsClamp) +{ + auto const grp_iter = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), [](auto const& i) { + return i / 10; // 0-9 in the first group, 10-19 in the second, etc. + }); + auto const agg_iter = thrust::make_constant_iterator(1); + + auto const grp = ints_column(grp_iter, grp_iter + 30); + auto const agg = ints_column(agg_iter, agg_iter + 30); + + auto const min_periods = 0; + auto const rolling_sum = [&](auto const preceding, auto const following) { + return cudf::grouped_rolling_window( + cudf::table_view{{grp}}, agg, preceding, following, min_periods, *AGG_SUM); + }; + + // Testing negative preceding. + for (auto const preceding : {0, -1, -2, -5, -10, -20, -50}) { + auto const results = rolling_sum(preceding, 100); + auto const expected_fun = [&](auto const& i) { + assert(preceding < 1); + auto const index_in_group = i % 10; + auto const start = std::min(-(preceding - 1) + index_in_group, 10); + return int64_t{10 - start}; + }; + auto const expected_iter = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), expected_fun); + auto const expected = bigints_column(expected_iter, expected_iter + 30, no_nulls()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } + + // Testing negative following. + for (auto const following : {-1, -2, -5, -10, -20, -50}) { + auto const results = rolling_sum(100, following); + auto const expected_fun = [&](auto const& i) { + assert(following < 0); + auto const index_in_group = i % 10; + auto const end = std::max(index_in_group + following, -1); + return int64_t{end + 1}; + }; + auto const expected_iter = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), expected_fun); + auto const expected = bigints_column(expected_iter, expected_iter + 30, no_nulls()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } +} + +TEST_F(OffsetRowWindowTest, CheckGroupBoundaries) +{ + auto grp_iter = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), [](auto const& i) { + if (i < 10) return 1; + if (i < 20) return 2; + return 3; + }); + auto const grp = ints_column(grp_iter, grp_iter + 30); + auto const agg = ints_column(grp_iter, grp_iter + 30); + { + auto const results = + cudf::grouped_rolling_window(cudf::table_view{{grp}}, + agg, + -80, + 100, + 1, + *cudf::make_max_aggregation()); + auto const null_iter = thrust::make_constant_iterator(null); + auto const expected = ints_column(null_iter, null_iter + 30, all_nulls()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + } + { + auto const results = + cudf::grouped_rolling_window(cudf::table_view{{grp}}, + agg, + -1, + 4, + 1, + *cudf::make_min_aggregation()); + auto const expected = + ints_column{{1, 1, 1, 1, 1, 1, 1, 1, null, null, 2, 2, 2, 2, 2, + 2, 2, 2, null, null, 3, 3, 3, 3, 3, 3, 3, 3, null, null}, + nulls_at({8, 9, 18, 19, 28, 29})}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + } +} diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp index e410e2488b3..d0181974479 100644 --- a/cpp/tests/rolling/rolling_test.cpp +++ b/cpp/tests/rolling/rolling_test.cpp @@ -148,20 +148,6 @@ TEST_F(RollingStringTest, MinPeriods) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_count_all, got_count_all->view()); } -TEST_F(RollingStringTest, ZeroWindowSize) -{ - cudf::test::strings_column_wrapper input( - {"This", "is", "rolling", "test", "being", "operated", "on", "string", "column"}, - {1, 0, 0, 1, 0, 1, 1, 1, 0}); - cudf::test::fixed_width_column_wrapper expected_count( - {0, 0, 0, 0, 0, 0, 0, 0, 0}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - - auto got_count = cudf::rolling_window( - input, 0, 0, 0, *cudf::make_count_aggregation()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_count, got_count->view()); -} - // ========================================================================================= class RollingStructTest : public cudf::test::BaseFixture {}; @@ -970,6 +956,7 @@ TEST_F(RollingtVarStdTestUntyped, SimpleStaticVarianceStdInfNaN) #undef XXX } +/* // negative sizes TYPED_TEST(RollingTest, NegativeWindowSizes) { @@ -980,10 +967,12 @@ TYPED_TEST(RollingTest, NegativeWindowSizes) std::vector window{3}; std::vector negative_window{-2}; + this->run_test_col_agg(input, negative_window, window, 1); this->run_test_col_agg(input, window, negative_window, 1); this->run_test_col_agg(input, negative_window, negative_window, 1); } + */ // simple example from Pandas docs: TYPED_TEST(RollingTest, SimpleDynamic) @@ -1033,6 +1022,7 @@ TYPED_TEST(RollingTest, AllInvalid) } // window = following_window = 0 +// Note: Preceding includes current row, so its value is set to 1. TYPED_TEST(RollingTest, ZeroWindow) { cudf::size_type num_rows = 1000; @@ -1042,10 +1032,11 @@ TYPED_TEST(RollingTest, ZeroWindow) cudf::test::fixed_width_column_wrapper input( col_data.begin(), col_data.end(), col_mask.begin()); - std::vector window({0}); + std::vector preceding({0}); + std::vector following({1}); cudf::size_type periods = num_rows; - this->run_test_col_agg(input, window, window, periods); + this->run_test_col_agg(input, preceding, following, periods); } // min_periods = 0