From ec744de69d88ada46d744c5121e137c817cb2709 Mon Sep 17 00:00:00 2001 From: MithunR Date: Thu, 21 Sep 2023 11:45:13 -0700 Subject: [PATCH 1/3] Support negative preceding/following for ROW window functions (#14093) This commit adds support for "offset" ROW windows, where the preceding and following window bounds are allowed to have negative values. This allows window definitions to exclude the current row entirely. Prior to this change, ROW-based windows *had* to include the current row, causing `preceding` and `following` to support only non-negative values. Additionally, the inclusion of the current row would count against the `min_periods` check. The following is an example of the new "negative" semantics. Consider the input: ```c++ auto const row = ints_column{1, 2, 3, 4}; ``` If the window bounds are specified as (preceding=3, following=-1), then the window for the third row (`3`) is `{1, 2}`. `following=-1` indicates a "following" row *before* the current row. A negative value for `preceding` follows the existing convention of including the current row. This makes it slightly more involved: 1. `preceding=2` indicates *one* row before the current row. 2. `preceding=1` indicates the current row. 3. `preceding=0` indicates one row past (i.e. after) the current row. 4. `preceding=-1` indicates two rows after the current row. Et cetera. `min_periods` checks continue to be honoured as before, but the requirement for positive `min_periods` is dropped. `min_periods` only need be non-negative. Authors: - MithunR (https://github.com/mythrocks) Approvers: - Divye Gala (https://github.com/divyegala) - Robert Maynard (https://github.com/robertmaynard) URL: https://github.com/rapidsai/cudf/pull/14093 --- cpp/include/cudf/rolling.hpp | 24 +- cpp/src/rolling/detail/rolling.cuh | 33 +- .../rolling/detail/rolling_fixed_window.cu | 30 +- cpp/src/rolling/grouped_rolling.cu | 188 +++++++--- cpp/src/rolling/rolling.cu | 4 +- cpp/tests/CMakeLists.txt | 1 + cpp/tests/rolling/grouped_rolling_test.cpp | 5 +- cpp/tests/rolling/offset_row_window_test.cpp | 343 ++++++++++++++++++ cpp/tests/rolling/rolling_test.cpp | 23 +- 9 files changed, 552 insertions(+), 99 deletions(-) create mode 100644 cpp/tests/rolling/offset_row_window_test.cpp diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index efdb85691bd..ec93c709163 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -199,10 +199,30 @@ struct window_bounds { * column of the same type as the input. Therefore it is suggested to convert integer column types * (especially low-precision integers) to `FLOAT32` or `FLOAT64` before doing a rolling `MEAN`. * + * Note: `preceding_window` and `following_window` could well have negative values. This yields + * windows where the current row might not be included at all. For instance, consider a window + * defined as (preceding=3, following=-1). This produces a window from 2 (i.e. 3-1) rows preceding + * the current row, and 1 row *preceding* the current row. For the example above, the window for + * row#3 is: + * + * [ 10, 20, 10, 50, 60, 20, 30, 80, 40 ] + * <--window--> ^ + * | + * current_row + * + * Similarly, `preceding` could have a negative value, indicating that the window begins at a + * position after the current row. It differs slightly from the semantics for `following`, because + * `preceding` includes the current row. Therefore: + * 1. preceding=1 => Window starts at the current row. + * 2. preceding=0 => Window starts at 1 past the current row. + * 3. preceding=-1 => Window starts at 2 past the current row. Etc. + * * @param[in] group_keys The (pre-sorted) grouping columns * @param[in] input The input column (to be aggregated) - * @param[in] preceding_window The static rolling window size in the backward direction - * @param[in] following_window The static rolling window size in the forward direction + * @param[in] preceding_window The static rolling window size in the backward direction (for + * positive values), or forward direction (for negative values) + * @param[in] following_window The static rolling window size in the forward direction (for positive + * values), or backward direction (for negative values) * @param[in] min_periods Minimum number of observations in window required to have a value, * otherwise element `i` is null. * @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.) diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh index 3b6d53f43c4..0648ef3d30f 100644 --- a/cpp/src/rolling/detail/rolling.cuh +++ b/cpp/src/rolling/detail/rolling.cuh @@ -70,7 +70,22 @@ namespace cudf { namespace detail { -namespace { // anonymous +/// Helper function to materialize preceding/following offsets. +template +std::unique_ptr expand_to_column(Calculator const& calc, + size_type const& num_rows, + rmm::cuda_stream_view stream) +{ + auto window_column = cudf::make_numeric_column( + cudf::data_type{type_to_id()}, num_rows, cudf::mask_state::UNALLOCATED, stream); + + auto begin = cudf::detail::make_counting_transform_iterator(0, calc); + + thrust::copy_n( + rmm::exec_policy(stream), begin, num_rows, window_column->mutable_view().data()); + + return window_column; +} /** * @brief Operator for applying a generic (non-specialized) rolling aggregation on a single window. @@ -91,14 +106,14 @@ struct DeviceRolling { // operations we do support template - DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) + explicit DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) : min_periods(_min_periods) { } // operations we don't support template - DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) + explicit DeviceRolling(size_type _min_periods, std::enable_if_t()>* = nullptr) : min_periods(_min_periods) { CUDF_FAIL("Invalid aggregation/type pair"); @@ -111,7 +126,7 @@ struct DeviceRolling { mutable_column_device_view& output, size_type start_index, size_type end_index, - size_type current_index) + size_type current_index) const { using AggOp = typename corresponding_operator::type; AggOp agg_op; @@ -144,7 +159,7 @@ struct DeviceRolling { template struct DeviceRollingArgMinMaxBase { size_type min_periods; - DeviceRollingArgMinMaxBase(size_type _min_periods) : min_periods(_min_periods) {} + explicit DeviceRollingArgMinMaxBase(size_type _min_periods) : min_periods(_min_periods) {} static constexpr bool is_supported() { @@ -162,7 +177,7 @@ struct DeviceRollingArgMinMaxBase { */ template struct DeviceRollingArgMinMaxString : DeviceRollingArgMinMaxBase { - DeviceRollingArgMinMaxString(size_type _min_periods) + explicit DeviceRollingArgMinMaxString(size_type _min_periods) : DeviceRollingArgMinMaxBase(_min_periods) { } @@ -461,8 +476,8 @@ struct agg_specific_empty_output { } }; -std::unique_ptr empty_output_for_rolling_aggregation(column_view const& input, - rolling_aggregation const& agg) +static std::unique_ptr empty_output_for_rolling_aggregation(column_view const& input, + rolling_aggregation const& agg) { // TODO: // Ideally, for UDF aggregations, the returned column would match @@ -1215,8 +1230,6 @@ struct dispatch_rolling { } }; -} // namespace - // Applies a user-defined rolling window function to the values in a column. template std::unique_ptr rolling_window_udf(column_view const& input, diff --git a/cpp/src/rolling/detail/rolling_fixed_window.cu b/cpp/src/rolling/detail/rolling_fixed_window.cu index fb7b1b5f590..e951db955e5 100644 --- a/cpp/src/rolling/detail/rolling_fixed_window.cu +++ b/cpp/src/rolling/detail/rolling_fixed_window.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,9 @@ #include #include +#include + #include -#include namespace cudf::detail { @@ -43,6 +44,9 @@ std::unique_ptr rolling_window(column_view const& input, CUDF_EXPECTS((default_outputs.is_empty() || default_outputs.size() == input.size()), "Defaults column must be either empty or have as many rows as the input column."); + CUDF_EXPECTS(-(preceding_window - 1) <= following_window, + "Preceding window bounds must precede the following window bounds."); + if (agg.kind == aggregation::CUDA || agg.kind == aggregation::PTX) { // TODO: In future, might need to clamp preceding/following to column boundaries. return cudf::detail::rolling_window_udf(input, @@ -58,18 +62,22 @@ std::unique_ptr rolling_window(column_view const& input, // Clamp preceding/following to column boundaries. // E.g. If preceding_window == 2, then for a column of 5 elements, preceding_window will be: // [1, 2, 2, 2, 1] - auto const preceding_window_begin = cudf::detail::make_counting_transform_iterator( - 0, - [preceding_window] __device__(size_type i) { return thrust::min(i + 1, preceding_window); }); - auto const following_window_begin = cudf::detail::make_counting_transform_iterator( - 0, [col_size = input.size(), following_window] __device__(size_type i) { - return thrust::min(col_size - i - 1, following_window); - }); + auto const preceding_calc = [preceding_window] __device__(size_type i) { + return thrust::min(i + 1, preceding_window); + }; + + auto const following_calc = [col_size = input.size(), + following_window] __device__(size_type i) { + return thrust::min(col_size - i - 1, following_window); + }; + + auto const preceding_column = expand_to_column(preceding_calc, input.size(), stream); + auto const following_column = expand_to_column(following_calc, input.size(), stream); return cudf::detail::rolling_window(input, default_outputs, - preceding_window_begin, - following_window_begin, + preceding_column->view().begin(), + following_column->view().begin(), min_periods, agg, stream, diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index ca5c04d1c4f..6e69b5157c2 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -30,7 +30,6 @@ #include #include -#include #include #include #include @@ -94,6 +93,109 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, namespace detail { +/// Preceding window calculation functor. +template +struct row_based_preceding_calc { + cudf::size_type const* _group_offsets_begin; + cudf::size_type const* _group_labels_begin; + cudf::size_type const _preceding_window; + + row_based_preceding_calc(rmm::device_uvector const& group_offsets, + rmm::device_uvector const& group_labels, + cudf::size_type const& preceding_window) + : _group_offsets_begin(group_offsets.data()), + _group_labels_begin(group_labels.data()), + _preceding_window(preceding_window) + { + } + + __device__ cudf::size_type operator()(cudf::size_type const& idx) const + { + auto group_label = _group_labels_begin[idx]; + if constexpr (preceding_less_than_1) { // where 1 indicates only the current row. + auto group_end = _group_offsets_begin[group_label + 1]; + return thrust::maximum{}(_preceding_window, -(group_end - 1 - idx)); + } else { + auto group_start = _group_offsets_begin[group_label]; + return thrust::minimum{}(_preceding_window, + idx - group_start + 1); // Preceding includes current row. + } + } +}; + +/// Helper to materialize preceding-window column, corrected to respect group boundaries. +/// E.g. If preceding window == 5, then, +/// 1. For the first row in the group, the preceding is set to 1, +/// 2. For the next row in the group, preceding is set to 2, etc. +std::unique_ptr make_preceding_column( + rmm::device_uvector const& group_offsets, + rmm::device_uvector const& group_labels, + cudf::size_type const& preceding_window, + cudf::size_type const& num_rows, + rmm::cuda_stream_view stream) +{ + if (preceding_window < 1) { + auto const calc = row_based_preceding_calc(group_offsets, group_labels, preceding_window); + return cudf::detail::expand_to_column(calc, num_rows, stream); + } else { + auto const calc = + row_based_preceding_calc(group_offsets, group_labels, preceding_window); + return cudf::detail::expand_to_column(calc, num_rows, stream); + } +} + +/// Following window calculation functor. +template +struct row_based_following_calc { + cudf::size_type const* _group_offsets_begin; + cudf::size_type const* _group_labels_begin; + cudf::size_type const _following_window; + + row_based_following_calc(rmm::device_uvector const& group_offsets, + rmm::device_uvector const& group_labels, + cudf::size_type const& following_window) + : _group_offsets_begin(group_offsets.data()), + _group_labels_begin(group_labels.data()), + _following_window(following_window) + { + } + + __device__ cudf::size_type operator()(cudf::size_type const& idx) const + { + auto group_label = _group_labels_begin[idx]; + if constexpr (following_less_than_0) { + auto group_start = _group_offsets_begin[group_label]; + return thrust::maximum{}(_following_window, -(idx - group_start) - 1); + } else { + auto group_end = + _group_offsets_begin[group_label + 1]; // Cannot fall off the end, since offsets + // is capped with `input.size()`. + return thrust::minimum{}(_following_window, (group_end - 1) - idx); + } + } +}; + +/// Helper to materialize following-window column, corrected to respect group boundaries. +/// i.e. If following window == 5, then: +/// 1. For the last row in the group, the following is set to 0. +/// 2. For the second last row in the group, following is set to 1, etc. +std::unique_ptr make_following_column( + rmm::device_uvector const& group_offsets, + rmm::device_uvector const& group_labels, + cudf::size_type const& following_window, + cudf::size_type const& num_rows, + rmm::cuda_stream_view stream) +{ + if (following_window < 0) { + auto const calc = row_based_following_calc(group_offsets, group_labels, following_window); + return cudf::detail::expand_to_column(calc, num_rows, stream); + } else { + auto const calc = + row_based_following_calc(group_offsets, group_labels, following_window); + return cudf::detail::expand_to_column(calc, num_rows, stream); + } +} + std::unique_ptr grouped_rolling_window(table_view const& group_keys, column_view const& input, column_view const& default_outputs, @@ -111,7 +213,7 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, CUDF_EXPECTS((group_keys.num_columns() == 0 || group_keys.num_rows() == input.size()), "Size mismatch between group_keys and input vector."); - CUDF_EXPECTS((min_periods > 0), "min_periods must be positive"); + CUDF_EXPECTS((min_periods >= 0), "min_periods must be non-negative"); CUDF_EXPECTS((default_outputs.is_empty() || default_outputs.size() == input.size()), "Defaults column must be either empty or have as many rows as the input column."); @@ -127,6 +229,9 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, auto const preceding_window = preceding_window_bounds.value(); auto const following_window = following_window_bounds.value(); + CUDF_EXPECTS(-(preceding_window - 1) <= following_window, + "Preceding window bounds must precede the following window bounds."); + if (group_keys.num_columns() == 0) { // No Groupby columns specified. Treat as one big group. return rolling_window( @@ -157,24 +262,6 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, group_offsets.element(group_offsets.size() - 1, stream) == input.size() && "Must have at least one group."); - auto preceding_calculator = [d_group_offsets = group_offsets.data(), - d_group_labels = group_labels.data(), - preceding_window] __device__(size_type idx) { - auto group_label = d_group_labels[idx]; - auto group_start = d_group_offsets[group_label]; - return thrust::minimum{}(preceding_window, - idx - group_start + 1); // Preceding includes current row. - }; - - auto following_calculator = [d_group_offsets = group_offsets.data(), - d_group_labels = group_labels.data(), - following_window] __device__(size_type idx) { - auto group_label = d_group_labels[idx]; - auto group_end = d_group_offsets[group_label + 1]; // Cannot fall off the end, since offsets - // is capped with `input.size()`. - return thrust::minimum{}(following_window, (group_end - 1) - idx); - }; - if (aggr.kind == aggregation::CUDA || aggr.kind == aggregation::PTX) { cudf::detail::preceding_window_wrapper grouped_preceding_window{ group_offsets.data(), group_labels.data(), preceding_window}; @@ -192,15 +279,18 @@ std::unique_ptr grouped_rolling_window(table_view const& group_keys, stream, mr); } else { - return cudf::detail::rolling_window( - input, - default_outputs, - cudf::detail::make_counting_transform_iterator(0, preceding_calculator), - cudf::detail::make_counting_transform_iterator(0, following_calculator), - min_periods, - aggr, - stream, - mr); + auto const preceding_column = + make_preceding_column(group_offsets, group_labels, preceding_window, input.size(), stream); + auto const following_column = + make_following_column(group_offsets, group_labels, following_window, input.size(), stream); + return cudf::detail::rolling_window(input, + default_outputs, + preceding_column->view().begin(), + following_column->view().begin(), + min_periods, + aggr, + stream, + mr); } } @@ -321,22 +411,6 @@ std::tuple get_null_bounds_for_orderby_column( : std::make_tuple(num_rows - num_nulls, num_rows); } -template -std::unique_ptr expand_to_column(Calculator const& calc, - size_type const& num_rows, - rmm::cuda_stream_view stream) -{ - auto window_column = cudf::make_numeric_column( - cudf::data_type{type_to_id()}, num_rows, cudf::mask_state::UNALLOCATED, stream); - - auto begin = cudf::detail::make_counting_transform_iterator(0, calc); - - thrust::copy_n( - rmm::exec_policy(stream), begin, num_rows, window_column->mutable_view().data()); - - return window_column; -} - /// Range window computation, with /// 1. no grouping keys specified /// 2. rows in ASCENDING order. @@ -390,7 +464,8 @@ std::unique_ptr range_window_ASC(column_view const& input, 1; // Add 1, for `preceding` to account for current row. }; - auto const preceding_column = expand_to_column(preceding_calculator, input.size(), stream); + auto const preceding_column = + cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); auto const following_calculator = [nulls_begin_idx = h_nulls_begin_idx, @@ -425,7 +500,8 @@ std::unique_ptr range_window_ASC(column_view const& input, 1; }; - auto const following_column = expand_to_column(following_calculator, input.size(), stream); + auto const following_column = + cudf::detail::expand_to_column(following_calculator, input.size(), stream); return cudf::detail::rolling_window( input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); @@ -570,7 +646,8 @@ std::unique_ptr range_window_ASC(column_view const& input, 1; // Add 1, for `preceding` to account for current row. }; - auto const preceding_column = expand_to_column(preceding_calculator, input.size(), stream); + auto const preceding_column = + cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); auto const following_calculator = [d_group_offsets = group_offsets.data(), @@ -616,7 +693,8 @@ std::unique_ptr range_window_ASC(column_view const& input, 1; }; - auto const following_column = expand_to_column(following_calculator, input.size(), stream); + auto const following_column = + cudf::detail::expand_to_column(following_calculator, input.size(), stream); return cudf::detail::rolling_window( input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); @@ -675,7 +753,8 @@ std::unique_ptr range_window_DESC(column_view const& input, 1; // Add 1, for `preceding` to account for current row. }; - auto const preceding_column = expand_to_column(preceding_calculator, input.size(), stream); + auto const preceding_column = + cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); auto const following_calculator = [nulls_begin_idx = h_nulls_begin_idx, @@ -710,7 +789,8 @@ std::unique_ptr range_window_DESC(column_view const& input, 1; }; - auto const following_column = expand_to_column(following_calculator, input.size(), stream); + auto const following_column = + cudf::detail::expand_to_column(following_calculator, input.size(), stream); return cudf::detail::rolling_window( input, preceding_column->view(), following_column->view(), min_periods, aggr, stream, mr); @@ -774,7 +854,8 @@ std::unique_ptr range_window_DESC(column_view const& input, 1; // Add 1, for `preceding` to account for current row. }; - auto const preceding_column = expand_to_column(preceding_calculator, input.size(), stream); + auto const preceding_column = + cudf::detail::expand_to_column(preceding_calculator, input.size(), stream); auto const following_calculator = [d_group_offsets = group_offsets.data(), @@ -817,7 +898,8 @@ std::unique_ptr range_window_DESC(column_view const& input, 1; }; - auto const following_column = expand_to_column(following_calculator, input.size(), stream); + auto const following_column = + cudf::detail::expand_to_column(following_calculator, input.size(), stream); if (aggr.kind == aggregation::CUDA || aggr.kind == aggregation::PTX) { CUDF_FAIL("Ranged rolling window does NOT (yet) support UDF."); diff --git a/cpp/src/rolling/rolling.cu b/cpp/src/rolling/rolling.cu index d699d7bea85..5c78cc4382d 100644 --- a/cpp/src/rolling/rolling.cu +++ b/cpp/src/rolling/rolling.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,6 @@ #include #include -#include - namespace cudf { // Applies a fixed-size rolling window function to the values in a column, with default output diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 6414962903e..d1e50442058 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -453,6 +453,7 @@ ConfigureTest( rolling/grouped_rolling_test.cpp rolling/lead_lag_test.cpp rolling/nth_element_test.cpp + rolling/offset_row_window_test.cpp rolling/range_comparator_test.cu rolling/range_rolling_window_test.cpp rolling/range_window_bounds_test.cpp diff --git a/cpp/tests/rolling/grouped_rolling_test.cpp b/cpp/tests/rolling/grouped_rolling_test.cpp index 774f2f7fc40..7dd72ace53c 100644 --- a/cpp/tests/rolling/grouped_rolling_test.cpp +++ b/cpp/tests/rolling/grouped_rolling_test.cpp @@ -33,9 +33,6 @@ #include #include -#include -#include - const std::string cuda_func{ R"***( template @@ -637,7 +634,7 @@ TYPED_TEST(GroupedRollingTest, ZeroWindow) key_1_vec.end()); const cudf::table_view grouping_keys{std::vector{key_0, key_1}}; - cudf::size_type preceding_window = 0; + cudf::size_type preceding_window = 1; cudf::size_type following_window = 0; std::vector expected_group_offsets{0, 4, 8, DATA_SIZE}; diff --git a/cpp/tests/rolling/offset_row_window_test.cpp b/cpp/tests/rolling/offset_row_window_test.cpp new file mode 100644 index 00000000000..ec726878b34 --- /dev/null +++ b/cpp/tests/rolling/offset_row_window_test.cpp @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +template +using fwcw = cudf::test::fixed_width_column_wrapper; +template +using decimals_column = cudf::test::fixed_point_column_wrapper; +using ints_column = fwcw; +using bigints_column = fwcw; +using strings_column = cudf::test::strings_column_wrapper; +using lists_column = cudf::test::lists_column_wrapper; +using column_ptr = std::unique_ptr; +using cudf::test::iterators::all_nulls; +using cudf::test::iterators::no_nulls; +using cudf::test::iterators::nulls_at; + +auto constexpr null = int32_t{0}; // NULL representation for int32_t; + +struct OffsetRowWindowTest : public cudf::test::BaseFixture { + static ints_column const _keys; // {0, 0, 0, 0, 0, 0, 1, 1, 1, 1}; + static ints_column const _values; // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + struct rolling_runner { + cudf::window_bounds _preceding, _following; + cudf::size_type _min_periods; + bool _grouped = true; + + rolling_runner(cudf::window_bounds const& preceding, + cudf::window_bounds const& following, + cudf::size_type min_periods_ = 1) + : _preceding{preceding}, _following{following}, _min_periods{min_periods_} + { + } + + rolling_runner& min_periods(cudf::size_type min_periods_) + { + _min_periods = min_periods_; + return *this; + } + + rolling_runner& grouped(bool grouped_) + { + _grouped = grouped_; + return *this; + } + + std::unique_ptr operator()(cudf::rolling_aggregation const& agg) const + { + auto const grouping_keys = + _grouped ? std::vector{_keys} : std::vector{}; + return cudf::grouped_rolling_window( + cudf::table_view{grouping_keys}, _values, _preceding, _following, _min_periods, agg); + } + }; +}; + +ints_column const OffsetRowWindowTest::_keys{0, 0, 0, 0, 0, 0, 1, 1, 1, 1}; +ints_column const OffsetRowWindowTest::_values{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + +auto const AGG_COUNT_NON_NULL = + cudf::make_count_aggregation(cudf::null_policy::EXCLUDE); +auto const AGG_COUNT_ALL = + cudf::make_count_aggregation(cudf::null_policy::INCLUDE); +auto const AGG_MIN = cudf::make_min_aggregation(); +auto const AGG_MAX = cudf::make_max_aggregation(); +auto const AGG_SUM = cudf::make_sum_aggregation(); +auto const AGG_COLLECT_LIST = cudf::make_collect_list_aggregation(); + +TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_3_to_Minus_1) +{ + auto const preceding = cudf::window_bounds::get(3); + auto const following = cudf::window_bounds::get(-1); + auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(true); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{0, 1, 2, 2, 2, 2, 0, 1, 2, 2}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{0, 1, 2, 2, 2, 2, 0, 1, 2, 2}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_MIN), ints_column{{null, 0, 0, 1, 2, 3, null, 6, 6, 7}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_MAX), ints_column{{null, 0, 1, 2, 3, 4, null, 6, 7, 8}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_SUM), + bigints_column{{null, 0, 1, 3, 5, 7, null, 6, 13, 15}, nulls_at({0, 6})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{}, {0}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {}, {6}, {6, 7}, {7, 8}}, + nulls_at({0, 6})}); + + run_rolling.min_periods(0); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{0, 1, 2, 2, 2, 2, 0, 1, 2, 2}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{0, 1, 2, 2, 2, 2, 0, 1, 2, 2}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{}, {0}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {}, {6}, {6, 7}, {7, 8}}, no_nulls()}); +} + +TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_3_to_Minus_1) +{ + auto const preceding = cudf::window_bounds::get(3); + auto const following = cudf::window_bounds::get(-1); + auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(false); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_MIN), + ints_column{{null, 0, 0, 1, 2, 3, 4, 5, 6, 7}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_MAX), + ints_column{{null, 0, 1, 2, 3, 4, 5, 6, 7, 8}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_SUM), bigints_column{{null, 0, 1, 3, 5, 7, 9, 11, 13, 15}, nulls_at({0})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{}, {0}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}}, + nulls_at({0})}); + + run_rolling.min_periods(0); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{}, {0}, {0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}}, + no_nulls()}); +} + +TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_0_to_2) +{ + auto const preceding = cudf::window_bounds::get(0); + auto const following = cudf::window_bounds::get(2); + auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(true); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{2, 2, 2, 2, 1, null, 2, 2, 1, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COUNT_ALL), + ints_column{{2, 2, 2, 2, 1, null, 2, 2, 1, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_MIN), ints_column{{1, 2, 3, 4, 5, null, 7, 8, 9, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_MAX), ints_column{{2, 3, 4, 5, 5, null, 8, 9, 9, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_SUM), + bigints_column{{3, 5, 7, 9, 5, null, 15, 17, 9, null}, nulls_at({5, 9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}}, + nulls_at({5, 9})}); + + run_rolling.min_periods(0); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{2, 2, 2, 2, 1, 0, 2, 2, 1, 0}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{2, 2, 2, 2, 1, 0, 2, 2, 1, 0}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}}, no_nulls}); +} + +TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2) +{ + auto const preceding = cudf::window_bounds::get(0); + auto const following = cudf::window_bounds::get(2); + auto run_rolling = rolling_runner{preceding, following}.min_periods(1).grouped(false); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{2, 2, 2, 2, 2, 2, 2, 2, 1, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{2, 2, 2, 2, 2, 2, 2, 2, 1, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_MIN), + ints_column{{1, 2, 3, 4, 5, 6, 7, 8, 9, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_MAX), + ints_column{{2, 3, 4, 5, 6, 7, 8, 9, 9, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_SUM), bigints_column{{3, 5, 7, 9, 11, 13, 15, 17, 9, null}, nulls_at({9})}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9}, {}}, + nulls_at({9})}); + + run_rolling.min_periods(0); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_NON_NULL), + ints_column{{2, 2, 2, 2, 2, 2, 2, 2, 1, 0}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*run_rolling(*AGG_COUNT_ALL), + ints_column{{2, 2, 2, 2, 2, 2, 2, 2, 1, 0}, no_nulls()}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *run_rolling(*AGG_COLLECT_LIST), + lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9}, {}}, + no_nulls}); +} + +// To test that preceding bounds are clamped correctly at group boundaries. +TEST_F(OffsetRowWindowTest, TestNegativeBoundsClamp) +{ + auto const grp_iter = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), [](auto const& i) { + return i / 10; // 0-9 in the first group, 10-19 in the second, etc. + }); + auto const agg_iter = thrust::make_constant_iterator(1); + + auto const grp = ints_column(grp_iter, grp_iter + 30); + auto const agg = ints_column(agg_iter, agg_iter + 30); + + auto const min_periods = 0; + auto const rolling_sum = [&](auto const preceding, auto const following) { + return cudf::grouped_rolling_window( + cudf::table_view{{grp}}, agg, preceding, following, min_periods, *AGG_SUM); + }; + + // Testing negative preceding. + for (auto const preceding : {0, -1, -2, -5, -10, -20, -50}) { + auto const results = rolling_sum(preceding, 100); + auto const expected_fun = [&](auto const& i) { + assert(preceding < 1); + auto const index_in_group = i % 10; + auto const start = std::min(-(preceding - 1) + index_in_group, 10); + return int64_t{10 - start}; + }; + auto const expected_iter = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), expected_fun); + auto const expected = bigints_column(expected_iter, expected_iter + 30, no_nulls()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } + + // Testing negative following. + for (auto const following : {-1, -2, -5, -10, -20, -50}) { + auto const results = rolling_sum(100, following); + auto const expected_fun = [&](auto const& i) { + assert(following < 0); + auto const index_in_group = i % 10; + auto const end = std::max(index_in_group + following, -1); + return int64_t{end + 1}; + }; + auto const expected_iter = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), expected_fun); + auto const expected = bigints_column(expected_iter, expected_iter + 30, no_nulls()); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + } +} + +TEST_F(OffsetRowWindowTest, CheckGroupBoundaries) +{ + auto grp_iter = + thrust::make_transform_iterator(thrust::make_counting_iterator(0), [](auto const& i) { + if (i < 10) return 1; + if (i < 20) return 2; + return 3; + }); + auto const grp = ints_column(grp_iter, grp_iter + 30); + auto const agg = ints_column(grp_iter, grp_iter + 30); + { + auto const results = + cudf::grouped_rolling_window(cudf::table_view{{grp}}, + agg, + -80, + 100, + 1, + *cudf::make_max_aggregation()); + auto const null_iter = thrust::make_constant_iterator(null); + auto const expected = ints_column(null_iter, null_iter + 30, all_nulls()); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + } + { + auto const results = + cudf::grouped_rolling_window(cudf::table_view{{grp}}, + agg, + -1, + 4, + 1, + *cudf::make_min_aggregation()); + auto const expected = + ints_column{{1, 1, 1, 1, 1, 1, 1, 1, null, null, 2, 2, 2, 2, 2, + 2, 2, 2, null, null, 3, 3, 3, 3, 3, 3, 3, 3, null, null}, + nulls_at({8, 9, 18, 19, 28, 29})}; + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + } +} diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp index e410e2488b3..d0181974479 100644 --- a/cpp/tests/rolling/rolling_test.cpp +++ b/cpp/tests/rolling/rolling_test.cpp @@ -148,20 +148,6 @@ TEST_F(RollingStringTest, MinPeriods) CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_count_all, got_count_all->view()); } -TEST_F(RollingStringTest, ZeroWindowSize) -{ - cudf::test::strings_column_wrapper input( - {"This", "is", "rolling", "test", "being", "operated", "on", "string", "column"}, - {1, 0, 0, 1, 0, 1, 1, 1, 0}); - cudf::test::fixed_width_column_wrapper expected_count( - {0, 0, 0, 0, 0, 0, 0, 0, 0}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - - auto got_count = cudf::rolling_window( - input, 0, 0, 0, *cudf::make_count_aggregation()); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_count, got_count->view()); -} - // ========================================================================================= class RollingStructTest : public cudf::test::BaseFixture {}; @@ -970,6 +956,7 @@ TEST_F(RollingtVarStdTestUntyped, SimpleStaticVarianceStdInfNaN) #undef XXX } +/* // negative sizes TYPED_TEST(RollingTest, NegativeWindowSizes) { @@ -980,10 +967,12 @@ TYPED_TEST(RollingTest, NegativeWindowSizes) std::vector window{3}; std::vector negative_window{-2}; + this->run_test_col_agg(input, negative_window, window, 1); this->run_test_col_agg(input, window, negative_window, 1); this->run_test_col_agg(input, negative_window, negative_window, 1); } + */ // simple example from Pandas docs: TYPED_TEST(RollingTest, SimpleDynamic) @@ -1033,6 +1022,7 @@ TYPED_TEST(RollingTest, AllInvalid) } // window = following_window = 0 +// Note: Preceding includes current row, so its value is set to 1. TYPED_TEST(RollingTest, ZeroWindow) { cudf::size_type num_rows = 1000; @@ -1042,10 +1032,11 @@ TYPED_TEST(RollingTest, ZeroWindow) cudf::test::fixed_width_column_wrapper input( col_data.begin(), col_data.end(), col_mask.begin()); - std::vector window({0}); + std::vector preceding({0}); + std::vector following({1}); cudf::size_type periods = num_rows; - this->run_test_col_agg(input, window, window, periods); + this->run_test_col_agg(input, preceding, following, periods); } // min_periods = 0 From dcac6cc6a719e2caf1c461be32acd2f7e78308e2 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Thu, 21 Sep 2023 14:04:09 -0500 Subject: [PATCH 2/3] Update image names (#14145) PR updates `rapidsai/ci` references to `rapidsai/ci-conda` Authors: - Jake Awe (https://github.com/AyodeAwe) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) URL: https://github.com/rapidsai/cudf/pull/14145 --- .github/workflows/build.yaml | 2 +- .github/workflows/pr.yaml | 6 +++--- .github/workflows/test.yaml | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 91ec0904103..0e120d34bb1 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -62,7 +62,7 @@ jobs: arch: "amd64" branch: ${{ inputs.branch }} build_type: ${{ inputs.build_type || 'branch' }} - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" date: ${{ inputs.date }} node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b47a40b13d2..054ea7968c8 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -73,7 +73,7 @@ jobs: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" run_script: "ci/test_java.sh" conda-notebook-tests: needs: conda-python-build @@ -83,7 +83,7 @@ jobs: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" run_script: "ci/test_notebooks.sh" docs-build: needs: conda-python-build @@ -93,7 +93,7 @@ jobs: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" wheel-build-cudf: needs: checks diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6bd2787d6dc..030f2e41db4 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -32,7 +32,7 @@ jobs: sha: ${{ inputs.sha }} node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" run_script: "ci/test_cpp_memcheck.sh" conda-python-cudf-tests: secrets: inherit @@ -63,7 +63,7 @@ jobs: sha: ${{ inputs.sha }} node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit @@ -75,7 +75,7 @@ jobs: sha: ${{ inputs.sha }} node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci:latest" + container_image: "rapidsai/ci-conda:latest" run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit From f0ba8598dd9792e137ca7aa3a1b22dbb84393cc1 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Thu, 21 Sep 2023 16:28:29 -0700 Subject: [PATCH 3/3] Pin to numpy<1.25 and numba<0.58 to avoid errors and deprecation warnings-as-errors. (#14156) Closes #14155. Related: #14160. (Will newer numpy support be backported to pandas 1.x? edit: no, see below) Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Benjamin Zaitlen (https://github.com/quasiben) - Ray Douglass (https://github.com/raydouglass) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/14156 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 4 ++-- conda/environments/all_cuda-120_arch-x86_64.yaml | 4 ++-- conda/recipes/cudf/meta.yaml | 6 ++++-- dependencies.yaml | 8 +++++--- python/cudf/pyproject.toml | 6 +++--- python/cudf_kafka/pyproject.toml | 2 +- python/dask_cudf/pyproject.toml | 4 ++-- 7 files changed, 19 insertions(+), 15 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 692ba78f317..d4abc28cf13 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -55,8 +55,8 @@ dependencies: - nbsphinx - ninja - notebook -- numba>=0.57 -- numpy>=1.21 +- numba>=0.57,<0.58 +- numpy>=1.21,<1.25 - numpydoc - nvcc_linux-64=11.8 - nvcomp==2.6.1 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index cf1bf4b8733..9a98e400e6d 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -54,8 +54,8 @@ dependencies: - nbsphinx - ninja - notebook -- numba>=0.57 -- numpy>=1.21 +- numba>=0.57,<0.58 +- numpy>=1.21,<1.25 - numpydoc - nvcomp==2.6.1 - nvtx>=0.2.1 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index a909b72c878..54b687faa69 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -78,8 +78,10 @@ requirements: - typing_extensions >=4.0.0 - pandas >=1.3,<1.6.0dev0 - cupy >=12.0.0 - - numba >=0.57 - - numpy >=1.21 + # TODO: Pin to numba<0.58 until #14160 is resolved + - numba >=0.57,<0.58 + # TODO: Pin to numpy<1.25 until cudf requires pandas 2 + - numpy >=1.21,<1.25 - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} - libcudf ={{ version }} - {{ pin_compatible('rmm', max_pin='x.x') }} diff --git a/dependencies.yaml b/dependencies.yaml index 398ae193fe6..376e43094a7 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -259,7 +259,8 @@ dependencies: # Hard pin the patch version used during the build. This must be kept # in sync with the version pinned in get_arrow.cmake. - pyarrow==12.0.1.* - - numpy>=1.21 + # TODO: Pin to numpy<1.25 until cudf requires pandas 2 + - &numpy numpy>=1.21,<1.25 build_python: common: - output_types: [conda, requirements, pyproject] @@ -425,14 +426,15 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - fsspec>=0.6.0 - - numpy>=1.21 + - *numpy - pandas>=1.3,<1.6.0dev0 run_cudf: common: - output_types: [conda, requirements, pyproject] packages: - cachetools - - &numba numba>=0.57 + # TODO: Pin to numba<0.58 until #14160 is resolved + - &numba numba>=0.57,<0.58 - nvtx>=0.2.1 - packaging - rmm==23.10.* diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index 574769f68d1..085d78afc7c 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "cmake>=3.26.4", "cython>=3.0.0", "ninja", - "numpy>=1.21", + "numpy>=1.21,<1.25", "protoc-wheel", "pyarrow==12.0.1.*", "rmm==23.10.*", @@ -31,8 +31,8 @@ dependencies = [ "cuda-python>=11.7.1,<12.0a0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", - "numba>=0.57", - "numpy>=1.21", + "numba>=0.57,<0.58", + "numpy>=1.21,<1.25", "nvtx>=0.2.1", "packaging", "pandas>=1.3,<1.6.0dev0", diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml index a6ef867451b..386cdc32ab1 100644 --- a/python/cudf_kafka/pyproject.toml +++ b/python/cudf_kafka/pyproject.toml @@ -4,7 +4,7 @@ requires = [ "cython>=3.0.0", - "numpy>=1.21", + "numpy>=1.21,<1.25", "pyarrow==12.0.1.*", "setuptools", "wheel", diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 2464abca71a..922da366422 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "dask>=2023.7.1", "distributed>=2023.7.1", "fsspec>=0.6.0", - "numpy>=1.21", + "numpy>=1.21,<1.25", "pandas>=1.3,<1.6.0dev0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -40,7 +40,7 @@ dynamic = ["entry-points"] [project.optional-dependencies] test = [ "dask-cuda==23.10.*", - "numba>=0.57", + "numba>=0.57,<0.58", "pytest", "pytest-cov", "pytest-xdist",