Skip to content

Commit

Permalink
Remove unsanitized nulls from input strings columns in reduction gtes…
Browse files Browse the repository at this point in the history
…ts (#17202)

Input strings column containing unsanitized nulls may result in undefined behavior.
This PR fixes the input data to not include string characters in null rows in gtests for `REDUCTION_TESTS`.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #17202
  • Loading branch information
davidwendt authored Oct 31, 2024
1 parent a0711d0 commit 01cfcff
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 36 deletions.
2 changes: 1 addition & 1 deletion cpp/tests/reductions/list_rank_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ TEST_F(ListRankScanTest, ListOfStruct)
false,
false}};
auto col2 = cudf::test::strings_column_wrapper{
{"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"},
{"x", "x", "a", "a", "b", "", "a", "b", "a", "b", "a", "c", "a", "c", "", "", "b", "b"},
{true,
true,
true,
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/reductions/rank_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ auto make_input_column()
{
if constexpr (std::is_same_v<TypeParam, cudf::string_view>) {
return cudf::test::strings_column_wrapper{
{"0", "0", "4", "4", "4", "5", "7", "7", "7", "9", "9", "9"},
{"0", "0", "4", "4", "4", "", "7", "7", "7", "9", "9", "9"},
cudf::test::iterators::null_at(5)};
} else {
using fw_wrapper = cudf::test::fixed_width_column_wrapper<TypeParam>;
Expand Down
29 changes: 21 additions & 8 deletions cpp/tests/reductions/reduction_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1255,6 +1255,12 @@ TEST_P(StringReductionTest, MinMax)
// data and valid arrays
std::vector<std::string> host_strings(GetParam());
std::vector<bool> host_bools({true, false, true, true, true, true, false, false, true});
std::transform(thrust::counting_iterator<std::size_t>(0),
thrust::counting_iterator<std::size_t>(host_strings.size()),
host_strings.begin(),
[host_strings, host_bools](auto idx) {
return host_bools[idx] ? host_strings[idx] : std::string{};
});
bool succeed(true);
std::string initial_value = "init";

Expand Down Expand Up @@ -1381,7 +1387,7 @@ TEST_F(StringReductionTest, AllNull)
std::vector<std::string> host_strings(
{"one", "two", "three", "four", "five", "six", "seven", "eight", "nine"});
std::vector<bool> host_bools(host_strings.size(), false);
auto initial_value = cudf::make_string_scalar("init");
auto initial_value = cudf::make_string_scalar("");
initial_value->set_valid_async(false);

// string column with nulls
Expand Down Expand Up @@ -3082,21 +3088,28 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls)
using cudf::test::iterators::null_at;
using cudf::test::iterators::nulls_at;

// `null` means null at child column.
// `NULL` means null at parent column.
auto const input = [] {
auto child1 = STRINGS_CW{{"año",
"bit",
"₹1" /*null*/,
"aaa" /*NULL*/,
"", // child null
"aaa", // parent null
"zit",
"bat",
"aab",
"$1" /*null*/,
"€1" /*NULL*/,
"", // child null
"€1", // parent null
"wut"},
nulls_at({2, 7})};
auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10},
auto child2 = INTS_CW{{1,
2,
0, // child null
4, // parent null
5,
6,
7,
0, // child null
9, // parent NULL
10},
nulls_at({2, 7})};
return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})};
}();
Expand Down
60 changes: 34 additions & 26 deletions cpp/tests/reductions/scan_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -412,12 +412,13 @@ TEST_F(ScanStringsTest, MoreStringsMinMax)
{
int row_count = 512;

auto data_begin = cudf::detail::make_counting_transform_iterator(0, [](auto idx) {
auto validity = cudf::detail::make_counting_transform_iterator(
0, [](auto idx) -> bool { return (idx % 23) != 22; });
auto data_begin = cudf::detail::make_counting_transform_iterator(0, [validity](auto idx) {
if (validity[idx] == 0) return std::string{};
char const s = static_cast<char>('a' + (idx % 26));
return std::string{1, s};
});
auto validity = cudf::detail::make_counting_transform_iterator(
0, [](auto idx) -> bool { return (idx % 23) != 22; });
cudf::test::strings_column_wrapper col(data_begin, data_begin + row_count, validity);

thrust::host_vector<std::string> v(data_begin, data_begin + row_count);
Expand Down Expand Up @@ -620,21 +621,28 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls)
using cudf::test::iterators::null_at;
using cudf::test::iterators::nulls_at;

// `null` means null at child column.
// `NULL` means null at parent column.
auto const input = [] {
auto child1 = STRINGS_CW{{"año",
"bit",
"₹1" /*null*/,
"aaa" /*NULL*/,
"", // child null
"aaa", // parent null
"zit",
"bat",
"aab",
"$1" /*null*/,
"€1" /*NULL*/,
"", // child null
"€1", // parent null
"wut"},
nulls_at({2, 7})};
auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10},
auto child2 = INTS_CW{{1,
2,
0, // child null
4, // parent null
5,
6,
7,
0, // child null
9, // parent null
10},
nulls_at({2, 7})};
return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})};
}();
Expand Down Expand Up @@ -692,25 +700,25 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls)
auto const expected = [] {
auto child1 = STRINGS_CW{{"año",
"año",
"" /*null*/,
"" /*NULL*/,
"" /*NULL*/,
"" /*NULL*/,
"" /*NULL*/,
"" /*NULL*/,
"" /*NULL*/,
"" /*NULL*/},
"", // child null
"", // parent null
"", // parent null
"", // parent null
"", // parent null
"", // parent null
"", // parent null
""}, // parent null
null_at(2)};
auto child2 = INTS_CW{{1,
1,
0 /*null*/,
0 /*NULL*/,
0 /*NULL*/,
0 /*NULL*/,
0 /*NULL*/,
0 /*NULL*/,
0 /*NULL*/,
0 /*NULL*/},
0, // child null
0, // parent null
0, // parent null
0, // parent null
0, // parent null
0, // parent null
0, // parent null
0}, // parent null
null_at(2)};
return STRUCTS_CW{{child1, child2}, nulls_at({3, 4, 5, 6, 7, 8, 9})};
}();
Expand Down

0 comments on commit 01cfcff

Please sign in to comment.