Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose stream parameter in public strings find APIs #14060

Merged
merged 13 commits into from
Sep 21, 2023
Merged
100 changes: 58 additions & 42 deletions cpp/include/cudf/strings/find.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,21 @@ namespace strings {
*
* @throw cudf::logic_error if start position is greater than stop position.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param start First character position to include in the search.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param start First character position to include in the search
* @param stop Last position (exclusive) to include in the search.
* Default of -1 will search to the end of the string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New integer column with character position values.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New integer column with character position values
*/
std::unique_ptr<column> find(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
size_type start = 0,
size_type stop = -1,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -72,19 +74,21 @@ std::unique_ptr<column> find(
*
* @throw cudf::logic_error if start position is greater than stop position.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param start First position to include in the search.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param start First position to include in the search
* @param stop Last position (exclusive) to include in the search.
* Default of -1 will search starting at the end of the string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New integer column with character position values.
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New integer column with character position values
*/
std::unique_ptr<column> rfind(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
size_type start = 0,
size_type stop = -1,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down Expand Up @@ -123,37 +127,41 @@ std::unique_ptr<column> find(
*
* Any null string entries return corresponding null entries in the output columns.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> contains(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Returns a column of boolean values for each string where true indicates
* the corresponding target string was found within that string in the provided column.
*
* The 'output[i] = true` if string `targets[i]` is found inside `strings[i]` otherwise
* The 'output[i] = true` if string `targets[i]` is found inside `input[i]` otherwise
* `output[i] = false`.
* If `target[i]` is an empty string, true is returned for `output[i]`.
* If `target[i]` is null, false is returned for `output[i]`.
*
* Any null `strings[i]` row results in a null `output[i]` row.
* Any null string entries return corresponding null entries in the output columns.
*
* @throw cudf::logic_error if `strings.size() != targets.size()`.
*
* @param strings Strings instance for this operation.
* @param targets Strings column of targets to check row-wise in `strings`.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param targets Strings column of targets to check row-wise in `strings`
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> contains(
strings_column_view const& strings,
strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -166,14 +174,16 @@ std::unique_ptr<column> contains(
*
* Any null string entries return corresponding null entries in the output columns.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory.
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
* @return New type_id::BOOL8 column.
*/
std::unique_ptr<column> starts_with(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -190,14 +200,16 @@ std::unique_ptr<column> starts_with(
*
* @throw cudf::logic_error if `strings.size() != targets.size()`.
*
* @param strings Strings instance for this operation.
* @param targets Strings instance for this operation.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param targets Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> starts_with(
strings_column_view const& strings,
strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -210,14 +222,16 @@ std::unique_ptr<column> starts_with(
*
* Any null string entries return corresponding null entries in the output columns.
*
* @param strings Strings instance for this operation.
* @param target UTF-8 encoded string to search for in each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param target UTF-8 encoded string to search for in each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> ends_with(
strings_column_view const& strings,
strings_column_view const& input,
string_scalar const& target,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -234,14 +248,16 @@ std::unique_ptr<column> ends_with(
*
* @throw cudf::logic_error if `strings.size() != targets.size()`.
*
* @param strings Strings instance for this operation.
* @param targets Strings instance for this operation.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return New type_id::BOOL8 column.
* @param input Strings instance for this operation
* @param targets Strings instance for this operation
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New BOOL8 column
*/
std::unique_ptr<column> ends_with(
strings_column_view const& strings,
strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
/** @} */ // end of doxygen group
} // namespace strings
Expand Down
12 changes: 7 additions & 5 deletions cpp/include/cudf/strings/find_multiple.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2022, NVIDIA CORPORATION.
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -48,14 +48,16 @@ namespace strings {
*
* @throw cudf::logic_error if `targets` is empty or contains nulls
*
* @param input Strings instance for this operation.
* @param targets Strings to search for in each string.
* @param mr Device memory resource used to allocate the returned column's device memory.
* @return Lists column with character position values.
* @param input Strings instance for this operation
* @param targets Strings to search for in each string
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Lists column with character position values
*/
std::unique_ptr<column> find_multiple(
strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
2 changes: 2 additions & 0 deletions cpp/include/cudf/strings/findall.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,14 @@ struct regex_program;
*
* @param input Strings instance for this operation
* @param prog Regex program instance
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New lists column of strings
*/
std::unique_ptr<column> findall(
strings_column_view const& input,
regex_program const& prog,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/** @} */ // end of doxygen group
Expand Down
24 changes: 16 additions & 8 deletions cpp/src/strings/search/find.cu
Original file line number Diff line number Diff line change
Expand Up @@ -305,20 +305,22 @@ std::unique_ptr<column> find(strings_column_view const& strings,
string_scalar const& target,
size_type start,
size_type stop,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::find(strings, target, start, stop, cudf::get_default_stream(), mr);
return detail::find(strings, target, start, stop, stream, mr);
}

std::unique_ptr<column> rfind(strings_column_view const& strings,
string_scalar const& target,
size_type start,
size_type stop,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::rfind(strings, target, start, stop, cudf::get_default_stream(), mr);
return detail::rfind(strings, target, start, stop, stream, mr);
}

std::unique_ptr<column> find(strings_column_view const& input,
Expand Down Expand Up @@ -618,50 +620,56 @@ std::unique_ptr<column> ends_with(strings_column_view const& strings,

std::unique_ptr<column> contains(strings_column_view const& strings,
string_scalar const& target,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::contains(strings, target, cudf::get_default_stream(), mr);
return detail::contains(strings, target, stream, mr);
}

std::unique_ptr<column> contains(strings_column_view const& strings,
strings_column_view const& targets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::contains(strings, targets, cudf::get_default_stream(), mr);
return detail::contains(strings, targets, stream, mr);
}

std::unique_ptr<column> starts_with(strings_column_view const& strings,
string_scalar const& target,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::starts_with(strings, target, cudf::get_default_stream(), mr);
return detail::starts_with(strings, target, stream, mr);
}

std::unique_ptr<column> starts_with(strings_column_view const& strings,
strings_column_view const& targets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::starts_with(strings, targets, cudf::get_default_stream(), mr);
return detail::starts_with(strings, targets, stream, mr);
}

std::unique_ptr<column> ends_with(strings_column_view const& strings,
string_scalar const& target,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ends_with(strings, target, cudf::get_default_stream(), mr);
return detail::ends_with(strings, target, stream, mr);
}

std::unique_ptr<column> ends_with(strings_column_view const& strings,
strings_column_view const& targets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::ends_with(strings, targets, cudf::get_default_stream(), mr);
return detail::ends_with(strings, targets, stream, mr);
}

} // namespace strings
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/strings/search/find_multiple.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ std::unique_ptr<column> find_multiple(strings_column_view const& input,
results->set_null_count(0);

auto offsets = cudf::detail::sequence(strings_count + 1,
numeric_scalar<size_type>(0),
numeric_scalar<size_type>(targets_count),
numeric_scalar<size_type>(0, true, stream),
numeric_scalar<size_type>(targets_count, true, stream),
stream,
mr);
return make_lists_column(strings_count,
Expand All @@ -88,10 +88,11 @@ std::unique_ptr<column> find_multiple(strings_column_view const& input,
// external API
std::unique_ptr<column> find_multiple(strings_column_view const& input,
strings_column_view const& targets,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::find_multiple(input, targets, cudf::get_default_stream(), mr);
return detail::find_multiple(input, targets, stream, mr);
}

} // namespace strings
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/strings/search/findall.cu
Original file line number Diff line number Diff line change
Expand Up @@ -134,10 +134,11 @@ std::unique_ptr<column> findall(strings_column_view const& input,

std::unique_ptr<column> findall(strings_column_view const& input,
regex_program const& prog,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
CUDF_FUNC_RANGE();
return detail::findall(input, prog, cudf::get_default_stream(), mr);
return detail::findall(input, prog, stream, mr);
}

} // namespace strings
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,7 @@ ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE t
ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing)
ConfigureTest(STREAM_STRINGS_TEST streams/strings/find_test.cpp STREAM_MODE testing)

# ##################################################################################################
# Install tests ####################################################################################
Expand Down
Loading