From da64c08ce3cbc026940f14db74267dee23776115 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 29 Oct 2024 13:59:05 -0400 Subject: [PATCH 1/2] Remove inputs with unsanitized nulls in reduction gtests --- cpp/tests/reductions/list_rank_test.cpp | 2 +- cpp/tests/reductions/rank_tests.cpp | 2 +- cpp/tests/reductions/reduction_tests.cpp | 12 +++++++++--- cpp/tests/reductions/scan_tests.cpp | 11 ++++++----- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp index 736b5081d8f..cb412f1e925 100644 --- a/cpp/tests/reductions/list_rank_test.cpp +++ b/cpp/tests/reductions/list_rank_test.cpp @@ -131,7 +131,7 @@ TEST_F(ListRankScanTest, ListOfStruct) false, false}}; auto col2 = cudf::test::strings_column_wrapper{ - {"x", "x", "a", "a", "b", "b", "a", "b", "a", "b", "a", "c", "a", "c", "a", "c", "b", "b"}, + {"x", "x", "a", "a", "b", "", "a", "b", "a", "b", "a", "c", "a", "c", "", "", "b", "b"}, {true, true, true, diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp index 19633211192..130458548fc 100644 --- a/cpp/tests/reductions/rank_tests.cpp +++ b/cpp/tests/reductions/rank_tests.cpp @@ -125,7 +125,7 @@ auto make_input_column() { if constexpr (std::is_same_v) { return cudf::test::strings_column_wrapper{ - {"0", "0", "4", "4", "4", "5", "7", "7", "7", "9", "9", "9"}, + {"0", "0", "4", "4", "4", "", "7", "7", "7", "9", "9", "9"}, cudf::test::iterators::null_at(5)}; } else { using fw_wrapper = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index c09cde8f9e4..72af9dc5694 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1255,6 +1255,12 @@ TEST_P(StringReductionTest, MinMax) // data and valid arrays std::vector host_strings(GetParam()); std::vector host_bools({true, false, true, true, true, true, false, false, true}); + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(host_strings.size()), + host_strings.begin(), + [host_strings, host_bools](auto idx) { + return host_bools[idx] ? host_strings[idx] : std::string{}; + }); bool succeed(true); std::string initial_value = "init"; @@ -1381,7 +1387,7 @@ TEST_F(StringReductionTest, AllNull) std::vector host_strings( {"one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}); std::vector host_bools(host_strings.size(), false); - auto initial_value = cudf::make_string_scalar("init"); + auto initial_value = cudf::make_string_scalar(""); initial_value->set_valid_async(false); // string column with nulls @@ -3087,12 +3093,12 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls) auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "₹1" /*null*/, + "" /*null*/, "aaa" /*NULL*/, "zit", "bat", "aab", - "$1" /*null*/, + "" /*null*/, "€1" /*NULL*/, "wut"}, nulls_at({2, 7})}; diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index 72d92c5ac53..121faccabc4 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -412,12 +412,13 @@ TEST_F(ScanStringsTest, MoreStringsMinMax) { int row_count = 512; - auto data_begin = cudf::detail::make_counting_transform_iterator(0, [](auto idx) { + auto validity = cudf::detail::make_counting_transform_iterator( + 0, [](auto idx) -> bool { return (idx % 23) != 22; }); + auto data_begin = cudf::detail::make_counting_transform_iterator(0, [validity](auto idx) { + if (validity[idx] == 0) return std::string{}; char const s = static_cast('a' + (idx % 26)); return std::string{1, s}; }); - auto validity = cudf::detail::make_counting_transform_iterator( - 0, [](auto idx) -> bool { return (idx % 23) != 22; }); cudf::test::strings_column_wrapper col(data_begin, data_begin + row_count, validity); thrust::host_vector v(data_begin, data_begin + row_count); @@ -625,12 +626,12 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls) auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "₹1" /*null*/, + "" /*null*/, "aaa" /*NULL*/, "zit", "bat", "aab", - "$1" /*null*/, + "" /*null*/, "€1" /*NULL*/, "wut"}, nulls_at({2, 7})}; From c9a6e6ef54919b64a02b58e747ca3c79acc29c00 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 30 Oct 2024 11:14:21 -0400 Subject: [PATCH 2/2] remove null/NULL distinction --- cpp/tests/reductions/reduction_tests.cpp | 21 ++++++---- cpp/tests/reductions/scan_tests.cpp | 53 ++++++++++++++---------- 2 files changed, 44 insertions(+), 30 deletions(-) diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 72af9dc5694..67083f19b3a 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -3088,21 +3088,28 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls) using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; - // `null` means null at child column. - // `NULL` means null at parent column. auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "" /*null*/, - "aaa" /*NULL*/, + "", // child null + "aaa", // parent null "zit", "bat", "aab", - "" /*null*/, - "€1" /*NULL*/, + "", // child null + "€1", // parent null "wut"}, nulls_at({2, 7})}; - auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10}, + auto child2 = INTS_CW{{1, + 2, + 0, // child null + 4, // parent null + 5, + 6, + 7, + 0, // child null + 9, // parent NULL + 10}, nulls_at({2, 7})}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; }(); diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index 121faccabc4..5f911597b02 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -621,21 +621,28 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls) using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; - // `null` means null at child column. - // `NULL` means null at parent column. auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "" /*null*/, - "aaa" /*NULL*/, + "", // child null + "aaa", // parent null "zit", "bat", "aab", - "" /*null*/, - "€1" /*NULL*/, + "", // child null + "€1", // parent null "wut"}, nulls_at({2, 7})}; - auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10}, + auto child2 = INTS_CW{{1, + 2, + 0, // child null + 4, // parent null + 5, + 6, + 7, + 0, // child null + 9, // parent null + 10}, nulls_at({2, 7})}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; }(); @@ -693,25 +700,25 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls) auto const expected = [] { auto child1 = STRINGS_CW{{"año", "año", - "" /*null*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/}, + "", // child null + "", // parent null + "", // parent null + "", // parent null + "", // parent null + "", // parent null + "", // parent null + ""}, // parent null null_at(2)}; auto child2 = INTS_CW{{1, 1, - 0 /*null*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/}, + 0, // child null + 0, // parent null + 0, // parent null + 0, // parent null + 0, // parent null + 0, // parent null + 0, // parent null + 0}, // parent null null_at(2)}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 4, 5, 6, 7, 8, 9})}; }();