Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-23.10' into bug/df/colu…
Browse files Browse the repository at this point in the history
…mn_mismatch
  • Loading branch information
mroeschke committed Sep 22, 2023
2 parents 8fd3b09 + f0ba859 commit 15d3b2d
Show file tree
Hide file tree
Showing 19 changed files with 578 additions and 121 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
arch: "amd64"
branch: ${{ inputs.branch }}
build_type: ${{ inputs.build_type || 'branch' }}
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
date: ${{ inputs.date }}
node_type: "gpu-v100-latest-1"
run_script: "ci/build_docs.sh"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
conda-notebook-tests:
needs: conda-python-build
Expand All @@ -83,7 +83,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
Expand All @@ -93,7 +93,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build-cudf:
needs: checks
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_cpp_memcheck.sh"
conda-python-cudf-tests:
secrets: inherit
Expand Down Expand Up @@ -63,7 +63,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
conda-notebook-tests:
secrets: inherit
Expand All @@ -75,7 +75,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci:latest"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
wheel-tests-cudf:
secrets: inherit
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ dependencies:
- nbsphinx
- ninja
- notebook
- numba>=0.57
- numpy>=1.21
- numba>=0.57,<0.58
- numpy>=1.21,<1.25
- numpydoc
- nvcc_linux-64=11.8
- nvcomp==2.6.1
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ dependencies:
- nbsphinx
- ninja
- notebook
- numba>=0.57
- numpy>=1.21
- numba>=0.57,<0.58
- numpy>=1.21,<1.25
- numpydoc
- nvcomp==2.6.1
- nvtx>=0.2.1
Expand Down
6 changes: 4 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@ requirements:
- typing_extensions >=4.0.0
- pandas >=1.3,<1.6.0dev0
- cupy >=12.0.0
- numba >=0.57
- numpy >=1.21
# TODO: Pin to numba<0.58 until #14160 is resolved
- numba >=0.57,<0.58
# TODO: Pin to numpy<1.25 until cudf requires pandas 2
- numpy >=1.21,<1.25
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
- libcudf ={{ version }}
- {{ pin_compatible('rmm', max_pin='x.x') }}
Expand Down
24 changes: 22 additions & 2 deletions cpp/include/cudf/rolling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,30 @@ struct window_bounds {
* column of the same type as the input. Therefore it is suggested to convert integer column types
* (especially low-precision integers) to `FLOAT32` or `FLOAT64` before doing a rolling `MEAN`.
*
* Note: `preceding_window` and `following_window` could well have negative values. This yields
* windows where the current row might not be included at all. For instance, consider a window
* defined as (preceding=3, following=-1). This produces a window from 2 (i.e. 3-1) rows preceding
* the current row, and 1 row *preceding* the current row. For the example above, the window for
* row#3 is:
*
* [ 10, 20, 10, 50, 60, 20, 30, 80, 40 ]
* <--window--> ^
* |
* current_row
*
* Similarly, `preceding` could have a negative value, indicating that the window begins at a
* position after the current row. It differs slightly from the semantics for `following`, because
* `preceding` includes the current row. Therefore:
* 1. preceding=1 => Window starts at the current row.
* 2. preceding=0 => Window starts at 1 past the current row.
* 3. preceding=-1 => Window starts at 2 past the current row. Etc.
*
* @param[in] group_keys The (pre-sorted) grouping columns
* @param[in] input The input column (to be aggregated)
* @param[in] preceding_window The static rolling window size in the backward direction
* @param[in] following_window The static rolling window size in the forward direction
* @param[in] preceding_window The static rolling window size in the backward direction (for
* positive values), or forward direction (for negative values)
* @param[in] following_window The static rolling window size in the forward direction (for positive
* values), or backward direction (for negative values)
* @param[in] min_periods Minimum number of observations in window required to have a value,
* otherwise element `i` is null.
* @param[in] aggr The rolling window aggregation type (SUM, MAX, MIN, etc.)
Expand Down
33 changes: 23 additions & 10 deletions cpp/src/rolling/detail/rolling.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,22 @@ namespace cudf {

namespace detail {

namespace { // anonymous
/// Helper function to materialize preceding/following offsets.
template <typename Calculator>
std::unique_ptr<column> expand_to_column(Calculator const& calc,
size_type const& num_rows,
rmm::cuda_stream_view stream)
{
auto window_column = cudf::make_numeric_column(
cudf::data_type{type_to_id<size_type>()}, num_rows, cudf::mask_state::UNALLOCATED, stream);

auto begin = cudf::detail::make_counting_transform_iterator(0, calc);

thrust::copy_n(
rmm::exec_policy(stream), begin, num_rows, window_column->mutable_view().data<size_type>());

return window_column;
}

/**
* @brief Operator for applying a generic (non-specialized) rolling aggregation on a single window.
Expand All @@ -91,14 +106,14 @@ struct DeviceRolling {

// operations we do support
template <typename T = InputType, aggregation::Kind O = op>
DeviceRolling(size_type _min_periods, std::enable_if_t<is_supported<T, O>()>* = nullptr)
explicit DeviceRolling(size_type _min_periods, std::enable_if_t<is_supported<T, O>()>* = nullptr)
: min_periods(_min_periods)
{
}

// operations we don't support
template <typename T = InputType, aggregation::Kind O = op>
DeviceRolling(size_type _min_periods, std::enable_if_t<!is_supported<T, O>()>* = nullptr)
explicit DeviceRolling(size_type _min_periods, std::enable_if_t<!is_supported<T, O>()>* = nullptr)
: min_periods(_min_periods)
{
CUDF_FAIL("Invalid aggregation/type pair");
Expand All @@ -111,7 +126,7 @@ struct DeviceRolling {
mutable_column_device_view& output,
size_type start_index,
size_type end_index,
size_type current_index)
size_type current_index) const
{
using AggOp = typename corresponding_operator<op>::type;
AggOp agg_op;
Expand Down Expand Up @@ -144,7 +159,7 @@ struct DeviceRolling {
template <typename InputType, aggregation::Kind op>
struct DeviceRollingArgMinMaxBase {
size_type min_periods;
DeviceRollingArgMinMaxBase(size_type _min_periods) : min_periods(_min_periods) {}
explicit DeviceRollingArgMinMaxBase(size_type _min_periods) : min_periods(_min_periods) {}

static constexpr bool is_supported()
{
Expand All @@ -162,7 +177,7 @@ struct DeviceRollingArgMinMaxBase {
*/
template <aggregation::Kind op>
struct DeviceRollingArgMinMaxString : DeviceRollingArgMinMaxBase<cudf::string_view, op> {
DeviceRollingArgMinMaxString(size_type _min_periods)
explicit DeviceRollingArgMinMaxString(size_type _min_periods)
: DeviceRollingArgMinMaxBase<cudf::string_view, op>(_min_periods)
{
}
Expand Down Expand Up @@ -461,8 +476,8 @@ struct agg_specific_empty_output {
}
};

std::unique_ptr<column> empty_output_for_rolling_aggregation(column_view const& input,
rolling_aggregation const& agg)
static std::unique_ptr<column> empty_output_for_rolling_aggregation(column_view const& input,
rolling_aggregation const& agg)
{
// TODO:
// Ideally, for UDF aggregations, the returned column would match
Expand Down Expand Up @@ -1215,8 +1230,6 @@ struct dispatch_rolling {
}
};

} // namespace

// Applies a user-defined rolling window function to the values in a column.
template <typename PrecedingWindowIterator, typename FollowingWindowIterator>
std::unique_ptr<column> rolling_window_udf(column_view const& input,
Expand Down
30 changes: 19 additions & 11 deletions cpp/src/rolling/detail/rolling_fixed_window.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,8 +19,9 @@
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/utilities/default_stream.hpp>

#include <cudf_test/column_utilities.hpp>

#include <thrust/extrema.h>
#include <thrust/iterator/constant_iterator.h>

namespace cudf::detail {

Expand All @@ -43,6 +44,9 @@ std::unique_ptr<column> rolling_window(column_view const& input,
CUDF_EXPECTS((default_outputs.is_empty() || default_outputs.size() == input.size()),
"Defaults column must be either empty or have as many rows as the input column.");

CUDF_EXPECTS(-(preceding_window - 1) <= following_window,
"Preceding window bounds must precede the following window bounds.");

if (agg.kind == aggregation::CUDA || agg.kind == aggregation::PTX) {
// TODO: In future, might need to clamp preceding/following to column boundaries.
return cudf::detail::rolling_window_udf(input,
Expand All @@ -58,18 +62,22 @@ std::unique_ptr<column> rolling_window(column_view const& input,
// Clamp preceding/following to column boundaries.
// E.g. If preceding_window == 2, then for a column of 5 elements, preceding_window will be:
// [1, 2, 2, 2, 1]
auto const preceding_window_begin = cudf::detail::make_counting_transform_iterator(
0,
[preceding_window] __device__(size_type i) { return thrust::min(i + 1, preceding_window); });
auto const following_window_begin = cudf::detail::make_counting_transform_iterator(
0, [col_size = input.size(), following_window] __device__(size_type i) {
return thrust::min(col_size - i - 1, following_window);
});

auto const preceding_calc = [preceding_window] __device__(size_type i) {
return thrust::min(i + 1, preceding_window);
};

auto const following_calc = [col_size = input.size(),
following_window] __device__(size_type i) {
return thrust::min(col_size - i - 1, following_window);
};

auto const preceding_column = expand_to_column(preceding_calc, input.size(), stream);
auto const following_column = expand_to_column(following_calc, input.size(), stream);
return cudf::detail::rolling_window(input,
default_outputs,
preceding_window_begin,
following_window_begin,
preceding_column->view().begin<cudf::size_type>(),
following_column->view().begin<cudf::size_type>(),
min_periods,
agg,
stream,
Expand Down
Loading

0 comments on commit 15d3b2d

Please sign in to comment.