From 8c0ea2313776497e3eae722c6a29710cc38b6a37 Mon Sep 17 00:00:00 2001 From: nvauto <70000568+nvauto@users.noreply.github.com> Date: Tue, 24 Sep 2024 07:23:45 +0000 Subject: [PATCH 001/157] Init version 24.12.0-SNAPSHOT Change submodule to 24.12.0-SNAPSHOT Signed-off-by: nvauto <70000568+nvauto@users.noreply.github.com> --- .gitmodules | 2 +- CONTRIBUTING.md | 2 +- pom.xml | 2 +- src/main/cpp/CMakeLists.txt | 2 +- thirdparty/cudf | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.gitmodules b/.gitmodules index e2001c2c84..93d7670bbf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "thirdparty/cudf"] path = thirdparty/cudf url = https://github.com/rapidsai/cudf.git - branch = branch-24.10 + branch = branch-24.12 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 271e62feb1..bae978da31 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -165,7 +165,7 @@ $ ./build/build-in-docker install ... ``` Now cd to ~/repos/NVIDIA/spark-rapids and build with one of the options from -[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-24.10/CONTRIBUTING.md#building-from-source). +[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-24.12/CONTRIBUTING.md#building-from-source). ```bash $ ./build/buildall diff --git a/pom.xml b/pom.xml index 50da08d178..ea72dbd5f5 100644 --- a/pom.xml +++ b/pom.xml @@ -21,7 +21,7 @@ com.nvidia spark-rapids-jni - 24.10.0-SNAPSHOT + 24.12.0-SNAPSHOT jar RAPIDS Accelerator JNI for Apache Spark diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt index 3ee308550f..33066b6471 100644 --- a/src/main/cpp/CMakeLists.txt +++ b/src/main/cpp/CMakeLists.txt @@ -51,7 +51,7 @@ rapids_cuda_init_architectures(SPARK_RAPIDS_JNI) project( SPARK_RAPIDS_JNI - VERSION 24.10.00 + VERSION 24.12.00 LANGUAGES C CXX CUDA ) diff --git a/thirdparty/cudf b/thirdparty/cudf index 8b12cf4e66..99e282dc3e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 8b12cf4e66b4b1f8ec248493c27deb65ee625bbf +Subproject commit 99e282dc3efa702e451b09f7c8c9849d309affea From 59dc583fe3fceb0dac2bc7d146d419a73b7f5282 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:34:21 +0000 Subject: [PATCH 002/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 6badd6b183..99e282dc3e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 6badd6b183e966f7f882708a0f4b2c4d0f2b5368 +Subproject commit 99e282dc3efa702e451b09f7c8c9849d309affea From 9392c4f10bc963f512b8e1532b13ef3a5ce348c8 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 24 Sep 2024 14:50:50 +0000 Subject: [PATCH 003/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 6badd6b183..99e282dc3e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 6badd6b183e966f7f882708a0f4b2c4d0f2b5368 +Subproject commit 99e282dc3efa702e451b09f7c8c9849d309affea From dd12265f7f910bb48c92820d8e7ccc4f72bd67b1 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 25 Sep 2024 02:50:50 +0000 Subject: [PATCH 004/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 73fa557186..99e282dc3e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 73fa557186932fa867a0516f8947bb25b97d0f29 +Subproject commit 99e282dc3efa702e451b09f7c8c9849d309affea diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 4a43768aa3..b66514a04a 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "3e0509f389c300781715c0c6f30a6fe2eb03c1e7", + "git_tag" : "42e0d727e4044f7941a62e97d8f68fd14c24d02f", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.10" }, @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "99e237ef6a42321a6bbd28b7aab9e4cc4105e6a3", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.10" }, From a66ce16dff1a4156180a564e68aea2e2f50f1c60 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 25 Sep 2024 08:31:25 +0000 Subject: [PATCH 005/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 03c77c2176..99e282dc3e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 03c77c2176ee5f30ef3d10b9332ad9c3612db905 +Subproject commit 99e282dc3efa702e451b09f7c8c9849d309affea diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 4a43768aa3..b66514a04a 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "3e0509f389c300781715c0c6f30a6fe2eb03c1e7", + "git_tag" : "42e0d727e4044f7941a62e97d8f68fd14c24d02f", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.10" }, @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "99e237ef6a42321a6bbd28b7aab9e4cc4105e6a3", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.10" }, From c1cade8015add3b8fd69a460e63c8981206e2a0e Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 25 Sep 2024 14:31:30 +0000 Subject: [PATCH 006/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index dbe5528706..99e282dc3e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit dbe5528706b309a9a21f34e948c22c1c4de9caff +Subproject commit 99e282dc3efa702e451b09f7c8c9849d309affea diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 4a43768aa3..b66514a04a 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "3e0509f389c300781715c0c6f30a6fe2eb03c1e7", + "git_tag" : "42e0d727e4044f7941a62e97d8f68fd14c24d02f", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.10" }, @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "99e237ef6a42321a6bbd28b7aab9e4cc4105e6a3", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.10" }, From db536d779d56bed1c30be62c370e1ad263898a72 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:08:07 +0000 Subject: [PATCH 007/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index dbe5528706..99e282dc3e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit dbe5528706b309a9a21f34e948c22c1c4de9caff +Subproject commit 99e282dc3efa702e451b09f7c8c9849d309affea diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 4a43768aa3..b66514a04a 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "3e0509f389c300781715c0c6f30a6fe2eb03c1e7", + "git_tag" : "42e0d727e4044f7941a62e97d8f68fd14c24d02f", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.10" }, @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "99e237ef6a42321a6bbd28b7aab9e4cc4105e6a3", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.10" }, From cc04d59c3a76657a2440d4410b37d135c86b47c6 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 25 Sep 2024 20:31:47 +0000 Subject: [PATCH 008/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 8e784243c4..99e282dc3e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 8e784243c48e8420b7a75790fb42fc0ffbf6896a +Subproject commit 99e282dc3efa702e451b09f7c8c9849d309affea diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 4c68db0220..b66514a04a 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "44b3f97a20db697adbcde2c222a2174561c12ddd", + "git_tag" : "42e0d727e4044f7941a62e97d8f68fd14c24d02f", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.10" }, @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "99e237ef6a42321a6bbd28b7aab9e4cc4105e6a3", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.10" }, From 8c95535c533dfd74cdfd43fafb9dba0a31456798 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:27:35 +0800 Subject: [PATCH 009/157] Update submodule cudf to d1b411a273486c0e4205384589d33372b6e32a59 (#2438) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 99e282dc3e..d1b411a273 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 99e282dc3efa702e451b09f7c8c9849d309affea +Subproject commit d1b411a273486c0e4205384589d33372b6e32a59 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index caace92d41..6936d8fd01 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -3815fab6439e911530a5e0338d1fa564b6d3443b +2413be67c97acf57a651481467815036570d11ee diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index b66514a04a..b0c91c81b6 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "42e0d727e4044f7941a62e97d8f68fd14c24d02f", + "git_tag" : "638d53dfbecb64c83bcd730becff8640e0c8e1e3", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -141,9 +141,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "99e237ef6a42321a6bbd28b7aab9e4cc4105e6a3", + "git_tag" : "163db70d4247314aaa9fe6cdbacf03f8d65954ff", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From eaa331e6cba98ab0a14ac57ee1f7a85f27aa275e Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 26 Sep 2024 03:31:17 +0000 Subject: [PATCH 010/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 12ee360048..d1b411a273 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 12ee360048473ddd06019090c7d19c67d6959f7a +Subproject commit d1b411a273486c0e4205384589d33372b6e32a59 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index caace92d41..6936d8fd01 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -3815fab6439e911530a5e0338d1fa564b6d3443b +2413be67c97acf57a651481467815036570d11ee diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 4c68db0220..b0c91c81b6 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "44b3f97a20db697adbcde2c222a2174561c12ddd", + "git_tag" : "638d53dfbecb64c83bcd730becff8640e0c8e1e3", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -141,9 +141,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "163db70d4247314aaa9fe6cdbacf03f8d65954ff", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From 0d54865b7dcddfbe9ead3a407c2255ca09a412b9 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 26 Sep 2024 04:35:38 +0000 Subject: [PATCH 011/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b00a718a79..d1b411a273 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b00a718a7980fadc91c8b37d6bbe829e4b8549e8 +Subproject commit d1b411a273486c0e4205384589d33372b6e32a59 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index caace92d41..6936d8fd01 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -3815fab6439e911530a5e0338d1fa564b6d3443b +2413be67c97acf57a651481467815036570d11ee diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 4c68db0220..b0c91c81b6 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "44b3f97a20db697adbcde2c222a2174561c12ddd", + "git_tag" : "638d53dfbecb64c83bcd730becff8640e0c8e1e3", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -141,9 +141,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "163db70d4247314aaa9fe6cdbacf03f8d65954ff", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From 206a1a3343d018939b7593391ee4bc174aac7d25 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 26 Sep 2024 22:38:23 +0800 Subject: [PATCH 012/157] Update submodule cudf to 6b3d57d33f9de725da86eedb67f3debb6f2d41b8 (#2443) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index d1b411a273..6b3d57d33f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit d1b411a273486c0e4205384589d33372b6e32a59 +Subproject commit 6b3d57d33f9de725da86eedb67f3debb6f2d41b8 From ab68b8d874dacffdf7305cb32032ce9195c7d319 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 27 Sep 2024 04:33:59 +0800 Subject: [PATCH 013/157] Update submodule cudf to 5f1396ae59e13831d11d822833b2ecf36a471328 (#2444) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 6b3d57d33f..5f1396ae59 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 6b3d57d33f9de725da86eedb67f3debb6f2d41b8 +Subproject commit 5f1396ae59e13831d11d822833b2ecf36a471328 From c493564b17372879f0e4d124ed5128b7dee04df9 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 26 Sep 2024 22:08:44 +0000 Subject: [PATCH 014/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 742eaadb92..5f1396ae59 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 742eaadb92b0c5159d92be49e647a17e8c1d0b9b +Subproject commit 5f1396ae59e13831d11d822833b2ecf36a471328 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index caace92d41..6936d8fd01 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -3815fab6439e911530a5e0338d1fa564b6d3443b +2413be67c97acf57a651481467815036570d11ee diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 4c68db0220..b0c91c81b6 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "44b3f97a20db697adbcde2c222a2174561c12ddd", + "git_tag" : "638d53dfbecb64c83bcd730becff8640e0c8e1e3", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -141,9 +141,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "163db70d4247314aaa9fe6cdbacf03f8d65954ff", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From 2f80bb1662b6bb003d128cb7e752ec433261bbeb Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 27 Sep 2024 10:33:08 +0800 Subject: [PATCH 015/157] Update submodule cudf to 9125d2f19ecd6a82f29cdb41928737ec73eb491b (#2447) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 5f1396ae59..9125d2f19e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 5f1396ae59e13831d11d822833b2ecf36a471328 +Subproject commit 9125d2f19ecd6a82f29cdb41928737ec73eb491b From e2a42df95bfe4f40fed62e3f14af23e7fa5af668 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:45:39 +0800 Subject: [PATCH 016/157] Update submodule cudf to 0632538a69f55f6d489d306edf2910a111430425 (#2448) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 9125d2f19e..0632538a69 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 9125d2f19ecd6a82f29cdb41928737ec73eb491b +Subproject commit 0632538a69f55f6d489d306edf2910a111430425 From 59e5085178f7e7c80dd9c30b2419842c310cc4a4 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 28 Sep 2024 05:19:29 +0800 Subject: [PATCH 017/157] Update submodule cudf to 22d481a4e3a34d517ad9a9ac46b8b1b456d365c6 (#2451) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 0632538a69..22d481a4e3 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 0632538a69f55f6d489d306edf2910a111430425 +Subproject commit 22d481a4e3a34d517ad9a9ac46b8b1b456d365c6 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index b0c91c81b6..e5938f9929 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "163db70d4247314aaa9fe6cdbacf03f8d65954ff", + "git_tag" : "b51447393c523cc929608d84850c70a3eae08af3", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From fdfb626884c95bccbf06e578c63a5d4868870129 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 28 Sep 2024 10:48:54 +0800 Subject: [PATCH 018/157] Update submodule cudf to 6973ef806bc9d3cbda37a4c7caa763da12b84b7f (#2452) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 22d481a4e3..6973ef806b 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 22d481a4e3a34d517ad9a9ac46b8b1b456d365c6 +Subproject commit 6973ef806bc9d3cbda37a4c7caa763da12b84b7f From c262911d055155ec936f7a05fc223ce3f7dd755f Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 28 Sep 2024 16:32:16 +0800 Subject: [PATCH 019/157] Update submodule cudf to e2bcbb880f540987eb3fbd0fede9fed826ea2fdf (#2453) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 6973ef806b..e2bcbb880f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 6973ef806bc9d3cbda37a4c7caa763da12b84b7f +Subproject commit e2bcbb880f540987eb3fbd0fede9fed826ea2fdf From 16307ab80c30ff02da7bb8497027639d5554edb4 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Mon, 30 Sep 2024 20:31:28 +0800 Subject: [PATCH 020/157] Update submodule cudf to 9b2f892c5ec59605bfdc3a2abe4885176950589a (#2454) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index e2bcbb880f..9b2f892c5e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit e2bcbb880f540987eb3fbd0fede9fed826ea2fdf +Subproject commit 9b2f892c5ec59605bfdc3a2abe4885176950589a diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index e5938f9929..3740a1fc97 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "638d53dfbecb64c83bcd730becff8640e0c8e1e3", + "git_tag" : "3e0aabe94d93e49ec9b78ce392c3f204882c2cd5", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From 3a3220d7366de80507b21cb9c671aa6248c2a5f4 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Mon, 30 Sep 2024 22:11:49 +0000 Subject: [PATCH 021/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index f20491d336..9b2f892c5e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit f20491d3366808e5c62dcee2160fc8a9d5e50fa7 +Subproject commit 9b2f892c5ec59605bfdc3a2abe4885176950589a diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index caace92d41..6936d8fd01 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -3815fab6439e911530a5e0338d1fa564b6d3443b +2413be67c97acf57a651481467815036570d11ee diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 8a808aa4bc..3740a1fc97 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "2053638989525e18157cb5cad0f4414a6662187a", + "git_tag" : "3e0aabe94d93e49ec9b78ce392c3f204882c2cd5", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -141,9 +141,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "b51447393c523cc929608d84850c70a3eae08af3", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From 234751881978b15de6a02f434175e29f8c866894 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 1 Oct 2024 10:30:24 +0800 Subject: [PATCH 022/157] Update submodule cudf to 04baa225ca78de5717c50127bd5f77736f912930 (#2458) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 9b2f892c5e..04baa225ca 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 9b2f892c5ec59605bfdc3a2abe4885176950589a +Subproject commit 04baa225ca78de5717c50127bd5f77736f912930 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 3740a1fc97..9f92ab8ac2 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "b51447393c523cc929608d84850c70a3eae08af3", + "git_tag" : "9e410c0591f38aa6c0a17c4e2c2edc4f6bfed058", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From b2a411a602c1bf834421525dc4750dbc20a88495 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 2 Oct 2024 04:32:18 +0800 Subject: [PATCH 023/157] Update submodule cudf to 69dc356a5dd72232a6f4c8dac89432bfcdc0326b (#2459) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 04baa225ca..69dc356a5d 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 04baa225ca78de5717c50127bd5f77736f912930 +Subproject commit 69dc356a5dd72232a6f4c8dac89432bfcdc0326b diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 9f92ab8ac2..bd72ae2fe6 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "3e0aabe94d93e49ec9b78ce392c3f204882c2cd5", + "git_tag" : "b2592d493ec00b799c93a88b3328d07ce2be390d", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From 17effee36ae7ac7576c7b88f82c65f349d101fab Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:38:57 +0800 Subject: [PATCH 024/157] Update submodule cudf to dae9d6899dd722c52bd42dd0fee51f4a6b336c93 (#2461) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 69dc356a5d..dae9d6899d 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 69dc356a5dd72232a6f4c8dac89432bfcdc0326b +Subproject commit dae9d6899dd722c52bd42dd0fee51f4a6b336c93 From 26e4e3016754d44596ce9e530e58c1ed4af6dd68 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 2 Oct 2024 17:20:08 +0800 Subject: [PATCH 025/157] Update submodule cudf to bac81cb8f4c61c9a81e30e79d03c323406bf657a (#2462) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index dae9d6899d..bac81cb8f4 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit dae9d6899dd722c52bd42dd0fee51f4a6b336c93 +Subproject commit bac81cb8f4c61c9a81e30e79d03c323406bf657a From dce30650e4dd1339466afdf8998b07387d8b9d3a Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 2 Oct 2024 22:05:24 +0000 Subject: [PATCH 026/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 8a9df040e1..bac81cb8f4 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 8a9df040e18b2f54df67ad6fde94969990e61b7f +Subproject commit bac81cb8f4c61c9a81e30e79d03c323406bf657a diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index caace92d41..6936d8fd01 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -3815fab6439e911530a5e0338d1fa564b6d3443b +2413be67c97acf57a651481467815036570d11ee diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 3726d157ea..bd72ae2fe6 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "03fbb4593a599431012a7ceac39df6a43f3ebf90", + "git_tag" : "b2592d493ec00b799c93a88b3328d07ce2be390d", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -141,9 +141,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "ab6e2961d7b8f833f688775e941c4e2ed2bd4d8a", + "git_tag" : "9e410c0591f38aa6c0a17c4e2c2edc4f6bfed058", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From ae24b4dbf202efb3faa1a1c36beadaaf4b1728b0 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Thu, 3 Oct 2024 16:02:56 -0500 Subject: [PATCH 027/157] Add HostTable interface to allow wielding of host tables in native code (#2393) Signed-off-by: Jason Lowe --- src/main/cpp/CMakeLists.txt | 1 + src/main/cpp/src/HostTableJni.cpp | 249 ++++++++++++++ src/main/cpp/src/host_table_view.hpp | 320 ++++++++++++++++++ .../nvidia/spark/rapids/jni/HostTable.java | 190 +++++++++++ .../spark/rapids/jni/HostTableTest.java | 153 +++++++++ 5 files changed, 913 insertions(+) create mode 100644 src/main/cpp/src/HostTableJni.cpp create mode 100644 src/main/cpp/src/host_table_view.hpp create mode 100644 src/main/java/com/nvidia/spark/rapids/jni/HostTable.java create mode 100644 src/test/java/com/nvidia/spark/rapids/jni/HostTableTest.java diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt index 33066b6471..1ce648db3d 100644 --- a/src/main/cpp/CMakeLists.txt +++ b/src/main/cpp/CMakeLists.txt @@ -194,6 +194,7 @@ add_library( src/GpuTimeZoneDBJni.cpp src/HashJni.cpp src/HistogramJni.cpp + src/HostTableJni.cpp src/JSONUtilsJni.cpp src/NativeParquetJni.cpp src/ParseURIJni.cpp diff --git a/src/main/cpp/src/HostTableJni.cpp b/src/main/cpp/src/HostTableJni.cpp new file mode 100644 index 0000000000..0959f35e57 --- /dev/null +++ b/src/main/cpp/src/HostTableJni.cpp @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cudf_jni_apis.hpp" +#include "host_table_view.hpp" + +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace { + +using spark_rapids_jni::host_column_view; +using spark_rapids_jni::host_table_view; + +// Padding sizes to 64-byte for compatibility with Arrow +std::size_t pad_size(std::size_t size) { return rmm::align_up(size, 64); } + +// Determine the size of buffer needed to hold just the data portion of this column. +// This does not include validity, offsets, or any child columns. +std::size_t get_data_size(cudf::column_view const& c, cudaStream_t stream) +{ + auto dtype = c.type(); + if (cudf::is_fixed_width(dtype)) { + return cudf::size_of(dtype) * c.size(); + } else if (dtype.id() == cudf::type_id::STRING) { + auto scv = cudf::strings_column_view(c); + return scv.chars_size(stream); + } else { + throw std::runtime_error(std::string("unexpected data type: ") + + std::to_string(static_cast(dtype.id()))); + } +} + +// Determine the size of buffer needed to hold all of the data for a column. +// This includes validity, data, offsets, and child columns. +std::size_t column_size(cudf::column_view const& c, cudaStream_t stream) +{ + std::size_t size = 0; + if (c.data() != nullptr) { size += pad_size(get_data_size(c, stream)); } + if (c.has_nulls()) { size += cudf::bitmask_allocation_size_bytes(c.size()); } + return std::accumulate(c.child_begin(), + c.child_end(), + size, + [stream](std::size_t sum, cudf::column_view const& child) { + return sum + column_size(child, stream); + }); +} + +// Determine the size of buffer needed to hold all of the data for a table. +std::size_t host_buffer_size(cudf::table_view const& t, cudaStream_t stream) +{ + std::size_t s = 0; + return std::accumulate( + t.begin(), t.end(), s, [stream](std::size_t sum, cudf::column_view const& c) { + return sum + column_size(c, stream); + }); +} + +uint8_t* copy_to_host_async( + void const* src, uint8_t* dest, std::size_t size, uint8_t const* dest_end, cudaStream_t stream) +{ + if (dest + size > dest_end) { throw std::runtime_error("buffer overflow"); } + CUDF_CUDA_TRY(cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream)); + return dest + size; +} + +uint8_t* build_host_column_view_async(cudf::column_view const& dev_col, + uint8_t* bp, + uint8_t const* bp_end, + cudaStream_t stream, + std::vector& host_cols) +{ + void const* host_data = nullptr; + void const* dev_data = dev_col.data(); + if (dev_data != nullptr) { + host_data = bp; + auto data_size = get_data_size(dev_col, stream); + auto padded_bp_end = bp + pad_size(data_size); + bp = copy_to_host_async(dev_data, bp, data_size, bp_end, stream); + while (bp != padded_bp_end) { + *bp++ = 0; + } + } + cudf::bitmask_type const* host_null_mask = nullptr; + if (dev_col.has_nulls()) { + host_null_mask = reinterpret_cast(bp); + auto mask_size = cudf::bitmask_allocation_size_bytes(dev_col.size()); + bp = copy_to_host_async(dev_col.null_mask(), bp, mask_size, bp_end, stream); + } + std::vector children; + children.reserve(dev_col.num_children()); + std::for_each(dev_col.child_begin(), dev_col.child_end(), [&](cudf::column_view const& child) { + bp = build_host_column_view_async(child, bp, bp_end, stream, children); + }); + host_cols.push_back(host_column_view( + dev_col.type(), dev_col.size(), host_data, host_null_mask, dev_col.null_count(), children)); + return bp; +} + +std::unique_ptr to_host_table_async(cudf::table_view const& dev_table, + uint8_t* buffer, + std::size_t buffer_size, + cudaStream_t stream) +{ + uint8_t* bp = buffer; + uint8_t const* buffer_end = buffer + buffer_size; + std::vector cols; + cols.reserve(dev_table.num_columns()); + std::for_each(dev_table.begin(), dev_table.end(), [&](cudf::column_view const& dev_col) { + bp = build_host_column_view_async(dev_col, bp, buffer_end, stream, cols); + }); + return std::make_unique(cols); +} + +cudf::column_view to_device_column(host_column_view const& host_col, jlong host_to_dev_offset) +{ + auto data = host_col.data(); + if (data != nullptr) { data += host_to_dev_offset; } + auto mask = host_col.null_mask(); + if (mask != nullptr) { mask += host_to_dev_offset / sizeof(*mask); } + std::vector children; + std::transform(host_col.child_begin(), + host_col.child_end(), + std::back_inserter(children), + [host_to_dev_offset](host_column_view const& c) { + return to_device_column(c, host_to_dev_offset); + }); + return cudf::column_view( + host_col.type(), host_col.size(), data, mask, host_col.null_count(), 0, children); +} + +std::vector> to_device_column_views( + host_table_view const& host_table, jlong host_to_dev_offset) +{ + std::vector> cv_ptrs; + cv_ptrs.reserve(host_table.num_columns()); + std::transform( + host_table.begin(), + host_table.end(), + std::back_inserter(cv_ptrs), + [host_to_dev_offset](host_column_view const& host_col) { + return std::make_unique(to_device_column(host_col, host_to_dev_offset)); + }); + return cv_ptrs; +} + +} // anonymous namespace + +extern "C" { + +JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_HostTable_bufferSize(JNIEnv* env, + jclass, + jlong table_handle, + jlong jstream) +{ + JNI_NULL_CHECK(env, table_handle, "table is null", 0); + try { + cudf::jni::auto_set_device(env); + auto t = reinterpret_cast(table_handle); + auto stream = reinterpret_cast(jstream); + return static_cast(host_buffer_size(*t, stream)); + } + CATCH_STD(env, 0); +} + +JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_HostTable_copyFromTableAsync( + JNIEnv* env, jclass, jlong table_handle, jlong host_address, jlong host_size, jlong jstream) +{ + JNI_NULL_CHECK(env, table_handle, "table is null", 0); + try { + cudf::jni::auto_set_device(env); + auto table = reinterpret_cast(table_handle); + auto buffer = reinterpret_cast(host_address); + auto buffer_size = static_cast(host_size); + auto stream = reinterpret_cast(jstream); + auto host_table_view = to_host_table_async(*table, buffer, buffer_size, stream); + return reinterpret_cast(host_table_view.release()); + } + CATCH_STD(env, 0); +} + +JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_HostTable_toDeviceColumnViews( + JNIEnv* env, jclass, jlong table_handle, jlong host_to_dev_offset) +{ + JNI_NULL_CHECK(env, table_handle, "table is null", nullptr); + JNI_ARG_CHECK( + env, host_to_dev_offset % sizeof(cudf::bitmask_type) == 0, "invalid offset", nullptr); + try { + cudf::jni::auto_set_device(env); + auto host_table = reinterpret_cast(table_handle); + auto column_view_ptrs = to_device_column_views(*host_table, host_to_dev_offset); + cudf::jni::native_jlongArray handles(env, static_cast(column_view_ptrs.size())); + std::transform( + column_view_ptrs.begin(), + column_view_ptrs.end(), + handles.begin(), + [](std::unique_ptr& p) { return cudf::jni::release_as_jlong(p); }); + return handles.get_jArray(); + } + CATCH_STD(env, 0); +} + +JNIEXPORT void JNICALL Java_com_nvidia_spark_rapids_jni_HostTable_freeDeviceColumnView( + JNIEnv* env, jclass, jlong dev_column_view_handle) +{ + JNI_NULL_CHECK(env, dev_column_view_handle, "view is null", ); + try { + delete reinterpret_cast(dev_column_view_handle); + } + CATCH_STD(env, ); +} + +JNIEXPORT void JNICALL Java_com_nvidia_spark_rapids_jni_HostTable_freeHostTable(JNIEnv* env, + jclass, + jlong table_handle) +{ + JNI_NULL_CHECK(env, table_handle, "table is null", ); + try { + delete reinterpret_cast(table_handle); + } + CATCH_STD(env, ); +} + +} // extern "C" diff --git a/src/main/cpp/src/host_table_view.hpp b/src/main/cpp/src/host_table_view.hpp new file mode 100644 index 0000000000..f707b78976 --- /dev/null +++ b/src/main/cpp/src/host_table_view.hpp @@ -0,0 +1,320 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +namespace spark_rapids_jni { + +/** + * @brief A non-owning, immutable view of host data as a column of elements, + * some of which may be null as indicated by a bitmask. + * + * Unless otherwise noted, the memory layout of the `host_column_view`'s data and + * bitmask is expected to adhere to the Arrow Physical Memory Layout + * Specification: https://arrow.apache.org/docs/memory_layout.html + * + * Because `host_column_view` is non-owning, no host memory is allocated nor freed + * when `host_column_view` objects are created or destroyed. + */ +class host_column_view { + private: + cudf::data_type _type{cudf::type_id::EMPTY}; + cudf::size_type _size{}; + void const* _data{}; + cudf::bitmask_type const* _null_mask{}; + cudf::size_type _null_count{}; + std::vector _children{}; + + public: + host_column_view() = default; + ~host_column_view() = default; + host_column_view(host_column_view const&) = default; + host_column_view(host_column_view&&) = default; + host_column_view& operator=(host_column_view const&) = default; + host_column_view& operator=(host_column_view&&) = default; + + /** + * @brief Construct a `host_column_view` from pointers to host memory for the + * elements and bitmask of the column. + */ + host_column_view(cudf::data_type type, + cudf::size_type size, + void const* data, + cudf::bitmask_type const* null_mask, + cudf::size_type null_count, + std::vector const& children = {}) + : _type{type}, + _size{size}, + _data{data}, + _null_mask{null_mask}, + _null_count{null_count}, + _children{children} + { + CUDF_EXPECTS(size >= 0, "Column size cannot be negative."); + if (type.id() == cudf::type_id::EMPTY) { + _null_count = size; + CUDF_EXPECTS(nullptr == data, "EMPTY column should have no data."); + CUDF_EXPECTS(nullptr == null_mask, "EMPTY column should have no null mask."); + } else if (cudf::is_compound(type)) { + if (type.id() != cudf::type_id::STRING) { + CUDF_EXPECTS(nullptr == data, "Compound (parent) columns cannot have data"); + } + } else if (size > 0) { + CUDF_EXPECTS(nullptr != data, "Null data pointer."); + } + if ((null_count > 0) and (type.id() != cudf::type_id::EMPTY)) { + CUDF_EXPECTS(nullptr != null_mask, "Invalid null mask for non-zero null count."); + } + if (type.id() == cudf::type_id::EMPTY) { + CUDF_EXPECTS(num_children() == 0, "EMPTY column cannot have children."); + } + } + + /** + * @brief Returns the number of elements in the column + * + * @return The number of elements in the column + */ + [[nodiscard]] cudf::size_type size() const noexcept { return _size; } + + /** + * @brief Returns the element `data_type` + * + * @return The `data_type` of the elements in the column + */ + [[nodiscard]] cudf::data_type type() const noexcept { return _type; } + + /** + * @brief Indicates if the column can contain null elements, i.e., if it has + * an allocated bitmask. + * + * @note If `null_count() > 0`, this function must always return `true`. + * + * @return true The bitmask is allocated + * @return false The bitmask is not allocated + */ + [[nodiscard]] bool nullable() const noexcept { return nullptr != _null_mask; } + + /** + * @brief Returns the count of null elements + * + * @return The count of null elements + */ + [[nodiscard]] cudf::size_type null_count() const noexcept { return _null_count; } + + /** + * @brief Indicates if the column contains null elements, + * i.e., `null_count() > 0` + * + * @return true One or more elements are null + * @return false All elements are valid + */ + [[nodiscard]] bool has_nulls() const { return null_count() > 0; } + + /** + * @brief Returns raw pointer to the underlying bitmask allocation. + * + * @note If `null_count() == 0`, this may return `nullptr`. + * @return Raw pointer to the bitmask + */ + [[nodiscard]] cudf::bitmask_type const* null_mask() const noexcept { return _null_mask; } + + template + T const* data() const noexcept + { + return static_cast(_data); + } + + /** + * @brief Returns the specified child + * + * @param child_index The index of the desired child + * @return The requested child `column_view` + */ + [[nodiscard]] host_column_view const& child(cudf::size_type child_index) const + { + return _children.at(child_index); + } + + /** + * @brief Returns the number of child columns. + * + * @return The number of child columns + */ + [[nodiscard]] cudf::size_type num_children() const noexcept { return _children.size(); } + + /** + * @brief Returns iterator to the beginning of the ordered sequence of child column-views. + * + * @return An iterator to a `host_column_view` referencing the first child column + */ + auto child_begin() const noexcept { return _children.cbegin(); } + + /** + * @brief Returns iterator to the end of the ordered sequence of child column-views. + * + * @return An iterator to a `host_column_view` one past the end of the child columns + */ + auto child_end() const noexcept { return _children.cend(); } + + /** + * @brief Returns the child column corresponding to the offsets of a strings column + * + * @note This must only be called on a strings column. + */ + [[nodiscard]] host_column_view const& strings_offsets() const + { + return _children.at(cudf::strings_column_view::offsets_column_index); + } + + /** + * @brief Returns the child column corresponding to the offsets of a lists column + * + * @note This must only be called on a lists column. + */ + [[nodiscard]] host_column_view const& lists_offsets() const + { + return _children.at(cudf::lists_column_view::offsets_column_index); + } + + /** + * @brief Returns the child column containing the data of a lists column + * + * @note This must only be called on a lists column. + */ + [[nodiscard]] host_column_view const& lists_child() const + { + return _children.at(cudf::lists_column_view::child_column_index); + } +}; + +/** + * @brief A set of host_column_view's of the same size. + */ +class host_table_view { + private: + std::vector _columns{}; + cudf::size_type _num_rows{}; + + public: + using iterator = decltype(std::begin(_columns)); ///< Iterator type for the table + using const_iterator = decltype(std::cbegin(_columns)); ///< const iterator type for the table + + host_table_view() = default; + ~host_table_view() = default; + host_table_view(host_table_view const&) = default; + host_table_view(host_table_view&&) = default; + host_table_view& operator=(host_table_view const&) = default; + host_table_view& operator=(host_table_view&&) = default; + + /** + * @brief Construct from a vector of column views + * + * @note Because a `std::vector` is constructible from a + * `std::initializer_list`, this constructor also supports the following + * usage: + * ``` + * host_column_view c0, c1, c2; + * ... + * host_table_view t{{c0,c1,c2}}; // Creates a `host_table_view` from c0, c1, c2 + * ``` + * + * @throws cudf::logic_error If all views do not have the same size + * + * @param cols The vector of column views to construct the table from + */ + explicit host_table_view(std::vector const& cols) : _columns{cols} + { + if (num_columns() > 0) { + std::for_each(_columns.begin(), _columns.end(), [this](host_column_view const& col) { + CUDF_EXPECTS(col.size() == _columns.front().size(), "Column size mismatch."); + }); + _num_rows = _columns.front().size(); + } else { + _num_rows = 0; + } + } + + /** + * @brief Returns an iterator to the first view in the table. + * + * @return An iterator to the first host_column_view + */ + iterator begin() noexcept { return std::begin(_columns); } + + /** + * @brief Returns an iterator to the first view in the table. + * + * @return An iterator to the first host_column_view + */ + [[nodiscard]] const_iterator begin() const noexcept { return std::begin(_columns); } + + /** + * @brief Returns an iterator one past the last column view in the table. + * + * `end()` acts as a place holder. Attempting to dereference it results in + * undefined behavior. + * + * @return An iterator to one past the last column view in the table + */ + iterator end() noexcept { return std::end(_columns); } + + /** + * @brief Returns an iterator one past the last column view in the table. + * + * `end()` acts as a place holder. Attempting to dereference it results in + * undefined behavior. + * + * @return An iterator to one past the last column view in the table + */ + [[nodiscard]] const_iterator end() const noexcept { return std::end(_columns); } + + /** + * @brief Returns a reference to the view of the specified column + * + * @throws std::out_of_range + * If `column_index` is out of the range [0, num_columns) + * + * @param column_index The index of the desired column + * @return A reference to the desired column + */ + [[nodiscard]] host_column_view const& column(cudf::size_type column_index) const + { + return _columns.at(column_index); + } + + /** + * @brief Returns the number of columns + * + * @return The number of columns + */ + [[nodiscard]] cudf::size_type num_columns() const noexcept { return _columns.size(); } + + /** + * @brief Returns the number of rows + * + * @return The number of rows + */ + [[nodiscard]] cudf::size_type num_rows() const noexcept { return _num_rows; } +}; + +} // namespace spark_rapids_jni diff --git a/src/main/java/com/nvidia/spark/rapids/jni/HostTable.java b/src/main/java/com/nvidia/spark/rapids/jni/HostTable.java new file mode 100644 index 0000000000..754412d727 --- /dev/null +++ b/src/main/java/com/nvidia/spark/rapids/jni/HostTable.java @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +import ai.rapids.cudf.ColumnVector; +import ai.rapids.cudf.Cuda; +import ai.rapids.cudf.DeviceMemoryBuffer; +import ai.rapids.cudf.HostMemoryBuffer; +import ai.rapids.cudf.NativeDepsLoader; +import ai.rapids.cudf.Table; + +/** + * Represents a cudf Table but in host memory instead of device memory. + * Table is tracked in native code as a host_table_view. + */ +public class HostTable implements AutoCloseable { + + static { + NativeDepsLoader.loadNativeDeps(); + } + + private long nativeTableView; + private HostMemoryBuffer hostBuffer; + + /** + * Copies a device table to a host table asynchronously. + * NOTE: The caller must synchronize on the stream before examining the data on the host. + * @param table device table to copy + * @param stream stream to use for the copy + * @return host table + */ + public static HostTable fromTableAsync(Table table, Cuda.Stream stream) { + long size = bufferSize(table.getNativeView(), stream.getStream()); + long tableHandle = 0; + HostMemoryBuffer hostBuffer = HostMemoryBuffer.allocate(size); + try { + tableHandle = copyFromTableAsync(table.getNativeView(), + hostBuffer.getAddress(), hostBuffer.getLength(), stream.getStream()); + } catch (Throwable t) { + try { + hostBuffer.close(); + } catch (Throwable t2) { + t.addSuppressed(t2); + } + throw t; + } + return new HostTable(tableHandle, hostBuffer); + } + + /** + * Copies a device table to a host table synchronously. + * @param table device table to copy + * @param stream stream to use for the copy + * @return host table + */ + public static HostTable fromTable(Table table, Cuda.Stream stream) { + HostTable hostTable = fromTableAsync(table, stream); + stream.sync(); + return hostTable; + } + + /** + * Copies a device table to a host table synchronously on the default stream. + * @param table device table to copy + * @return host table + */ + public static HostTable fromTable(Table table) { + return fromTable(table, Cuda.DEFAULT_STREAM); + } + + private HostTable(long tableHandle, HostMemoryBuffer hostBuffer) { + this.nativeTableView = tableHandle; + this.hostBuffer = hostBuffer; + } + + /** + * Gets the address of the host_table_view for this host table. + * NOTE: This is only valid as long as the HostTable instance is valid. + */ + public long getNativeTableView() { + return nativeTableView; + } + + /** + * Gets the host memory buffer containing the data for this host table. + */ + public HostMemoryBuffer getHostBuffer() { + return hostBuffer; + } + + /** + * Copies the host table to a device table asynchronously. + * NOTE: The caller must synchronize on the stream before closing this instance, + * or the copy could still be in-flight when the host memory is invalidated or reused. + * @param stream stream to use for the copy + * @return device table + */ + public Table toTableAsync(Cuda.Stream stream) { + long size = hostBuffer.getLength(); + Table table = null; + try (DeviceMemoryBuffer devBuffer = DeviceMemoryBuffer.allocate(size, stream)) { + devBuffer.copyFromHostBufferAsync(hostBuffer, stream); + long hostToDevPtrOffset = devBuffer.getAddress() - hostBuffer.getAddress(); + long[] columnViewHandles = toDeviceColumnViews(nativeTableView, hostToDevPtrOffset); + ColumnVector[] columns = new ColumnVector[columnViewHandles.length]; + boolean done = false; + try { + for (int i = 0; i < columnViewHandles.length; i++) { + columns[i] = ColumnVector.fromViewWithContiguousAllocation(columnViewHandles[i], devBuffer); + columnViewHandles[i] = 0; + } + table = new Table(columns); + // Need to synchronize before returning to ensure host copy completed, otherwise caller may + // free and reuse the host buffer before device copy completes. + stream.sync(); + done = true; + } finally { + // always close columns because Table incremented refcounts + for (ColumnVector c : columns) { + if (c != null) { + c.close(); + } + } + if (!done) { + for (long viewHandle : columnViewHandles) { + if (viewHandle != 0) { + freeDeviceColumnView(viewHandle); + } + } + } + } + } + return table; + } + + /** + * Copies the host table to a device table synchronously. + * @param stream stream to use for the copy + * @return device table + */ + public Table toTable(Cuda.Stream stream) { + Table table = toTableAsync(stream); + stream.sync(); + return table; + } + + /** + * Copies the host table to a device table synchronously on the default stream. + * @return device table + */ + public Table toTable() { + return toTable(Cuda.DEFAULT_STREAM); + } + + @Override + public void close() { + try { + freeHostTable(nativeTableView); + } finally { + nativeTableView = 0; + hostBuffer.close(); + hostBuffer = null; + } + } + + private static native long bufferSize(long tableHandle, long stream); + + private static native long copyFromTableAsync(long tableHandle, long hostAddress, long hostSize, + long stream); + + private static native long[] toDeviceColumnViews(long tableHandle, long hostToDevPtrOffset); + + private static native void freeDeviceColumnView(long columnHandle); + + private static native void freeHostTable(long tableHandle); +} diff --git a/src/test/java/com/nvidia/spark/rapids/jni/HostTableTest.java b/src/test/java/com/nvidia/spark/rapids/jni/HostTableTest.java new file mode 100644 index 0000000000..0064dee1f5 --- /dev/null +++ b/src/test/java/com/nvidia/spark/rapids/jni/HostTableTest.java @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +import ai.rapids.cudf.AssertUtils; +import ai.rapids.cudf.Cuda; +import ai.rapids.cudf.DType; +import ai.rapids.cudf.ColumnVector; +import ai.rapids.cudf.Table; +import ai.rapids.cudf.HostColumnVector.BasicType; +import ai.rapids.cudf.HostColumnVector.DataType; +import ai.rapids.cudf.HostColumnVector.ListType; +import ai.rapids.cudf.HostColumnVector.StructData; +import ai.rapids.cudf.HostColumnVector.StructType; +import org.junit.jupiter.api.Test; + +public class HostTableTest { + @Test + public void testRoundTripSync() { + try (Table expected = buildTable()) { + try (HostTable ht = HostTable.fromTable(expected, Cuda.DEFAULT_STREAM)) { + Table actual = ht.toTable(Cuda.DEFAULT_STREAM); + AssertUtils.assertTablesAreEqual(expected, actual); + } + } + } + + @Test + public void testRoundTripSyncDefault() { + try (Table expected = buildTable()) { + try (HostTable ht = HostTable.fromTable(expected)) { + Table actual = ht.toTable(); + AssertUtils.assertTablesAreEqual(expected, actual); + } + } + } + + @Test + public void testRoundTripAsync() { + testRoundTripAsync(buildTable()); + } + + @Test + public void testRoundTripAsyncEmpty() { + testRoundTripAsync(buildEmptyTable()); + } + + private void testRoundTripAsync(Table expected) { + try (Table t = expected) { + try (HostTable ht = HostTable.fromTableAsync(t, Cuda.DEFAULT_STREAM)) { + Table actual = ht.toTableAsync(Cuda.DEFAULT_STREAM); + AssertUtils.assertTablesAreEqual(expected, actual); + } + } + } + + private Table buildEmptyTable() { + DataType listStringsType = new ListType(true, new BasicType(true, DType.STRING)); + DataType mapType = new ListType(true, + new StructType(true, + new BasicType(false, DType.STRING), + new BasicType(false, DType.STRING))); + DataType structType = new StructType(true, + new BasicType(true, DType.INT8), + new BasicType(false, DType.FLOAT32)); + try (ColumnVector emptyInt = ColumnVector.fromInts(); + ColumnVector emptyDouble = ColumnVector.fromDoubles(); + ColumnVector emptyString = ColumnVector.fromStrings(); + ColumnVector emptyListString = ColumnVector.fromLists(listStringsType); + ColumnVector emptyMap = ColumnVector.fromLists(mapType); + ColumnVector emptyStruct = ColumnVector.fromStructs(structType)) { + return new Table(emptyInt, emptyInt, emptyDouble, emptyString, + emptyListString, emptyMap, emptyStruct); + } + } + + private Table buildTable() { + StructType mapStructType = new StructType(true, + new BasicType(false, DType.STRING), + new BasicType(false, DType.STRING)); + StructType structType = new StructType(true, + new BasicType(true, DType.INT32), + new BasicType(false, DType.FLOAT32)); + return new Table.TestBuilder() + .column( 100, 202, 3003, 40004, 5, -60, 1, null, 3, null, 5, null, 7, null, 9, null, 11, null, 13, null, 15) + .column( true, true, false, false, true, null, true, true, null, false, false, null, true, true, null, false, false, null, true, true, null) + .column( (byte)1, (byte)2, null, (byte)4, (byte)5, (byte)6, (byte)1, (byte)2, (byte)3, null, (byte)5, (byte)6, (byte)7, null, (byte)9, (byte)10, (byte)11, null, (byte)13, (byte)14, (byte)15) + .column((short)6, (short)5, (short)4, null, (short)2, (short)1, (short)1, (short)2, (short)3, null, (short)5, (short)6, (short)7, null, (short)9, (short)10, null, (short)12, (short)13, (short)14, null) + .column( 1L, null, 1001L, 50L, -2000L, null, 1L, 2L, 3L, 4L, null, 6L, 7L, 8L, 9L, null, 11L, 12L, 13L, 14L, null) + .column( 10.1f, 20f, Float.NaN, 3.1415f, -60f, null, 1f, 2f, 3f, 4f, 5f, null, 7f, 8f, 9f, 10f, 11f, null, 13f, 14f, 15f) + .column( 10.1f, 20f, Float.NaN, 3.1415f, -60f, -50f, 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f, 9f, 10f, 11f, 12f, 13f, 14f, 15f) + .column( 10.1, 20.0, 33.1, 3.1415, -60.5, null, 1., 2., 3., 4., 5., 6., null, 8., 9., 10., 11., 12., null, 14., 15.) + .timestampDayColumn(99, 100, 101, 102, 103, 104, 1, 2, 3, 4, 5, 6, 7, null, 9, 10, 11, 12, 13, null, 15) + .timestampMillisecondsColumn(9L, 1006L, 101L, 5092L, null, 88L, 1L, 2L, 3L, 4L, 5L ,6L, 7L, 8L, null, 10L, 11L, 12L, 13L, 14L, 15L) + .timestampSecondsColumn(1L, null, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L ,6L, 7L, 8L, 9L, null, 11L, 12L, 13L, 14L, 15L) + .decimal32Column(-3, 100, 202, 3003, 40004, 5, -60, 1, null, 3, null, 5, null, 7, null, 9, null, 11, null, 13, null, 15) + .decimal64Column(-8, 1L, null, 1001L, 50L, -2000L, null, 1L, 2L, 3L, 4L, null, 6L, 7L, 8L, 9L, null, 11L, 12L, 13L, 14L, null) + .column( "A", "B", "C", "D", null, "TESTING", "1", "2", "3", "4", "5", "6", "7", null, "9", "10", "11", "12", "13", null, "15") + .column( + strings("1", "2", "3"), strings("4"), strings("5"), strings("6, 7"), + strings("", "9", null), strings("11"), strings(""), strings(null, null), + strings("15", null), null, null, strings("18", "19", "20"), + null, strings("22"), strings("23", ""), null, + null, null, null, strings(), + strings("the end")) + .column(mapStructType, + structs(struct("1", "2")), structs(struct("3", "4")), + null, null, + structs(struct("key", "value"), struct("a", "b")), null, + null, structs(struct("3", "4"), struct("1", "2")), + structs(), structs(null, struct("foo", "bar")), + structs(null, null, null), null, + null, null, + null, null, + null, null, + null, null, + structs(struct("the", "end"))) + .column(structType, + struct(1, 1f), null, struct(2, 3f), null, struct(8, 7f), + struct(0, 0f), null, null, struct(-1, -1f), struct(-100, -100f), + struct(Integer.MAX_VALUE, Float.MAX_VALUE), null, null, null, null, + null, null, null, null, null, + struct(Integer.MIN_VALUE, Float.MIN_VALUE)) + .column( "A", "A", "C", "C", null, "TESTING", "1", "2", "3", "4", "5", "6", "7", null, "9", "10", "11", "12", "13", null, "15") + .build(); + } + + private static StructData struct(Object... values) { + return new StructData(values); + } + + private static StructData[] structs(StructData... values) { + return values; + } + + private static String[] strings(String... values) { + return values; + } +} From 7623334e7aeb6ff386e6f9e74767c76914f0e998 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 4 Oct 2024 05:45:22 +0800 Subject: [PATCH 028/157] Update submodule cudf to bd3b3327a6326ffea4658d682b8b9087e32da98a (#2466) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index bac81cb8f4..bd3b3327a6 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit bac81cb8f4c61c9a81e30e79d03c323406bf657a +Subproject commit bd3b3327a6326ffea4658d682b8b9087e32da98a diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index bd72ae2fe6..98d5689c1d 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "b2592d493ec00b799c93a88b3328d07ce2be390d", + "git_tag" : "473e537a59b87059254187b67de6991b9e35379e", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "9e410c0591f38aa6c0a17c4e2c2edc4f6bfed058", + "git_tag" : "6489bb7df63a3784b4a94067e3a8fa8917523ab7", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From bf75ec61925704ae448c25f8fb723060c5fc87b3 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 4 Oct 2024 10:33:28 +0800 Subject: [PATCH 029/157] Update submodule cudf to 010839172ecb5a99609044a98031ff5b7578cd64 (#2467) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index bd3b3327a6..010839172e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit bd3b3327a6326ffea4658d682b8b9087e32da98a +Subproject commit 010839172ecb5a99609044a98031ff5b7578cd64 From ec8b4d5b5a702b0bad680dc05c21b51dac5701e2 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 5 Oct 2024 05:19:05 +0800 Subject: [PATCH 030/157] Update submodule cudf to a8da1ff2b393abbafa27dddcf4c19481ec853c28 (#2469) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 010839172e..a8da1ff2b3 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 010839172ecb5a99609044a98031ff5b7578cd64 +Subproject commit a8da1ff2b393abbafa27dddcf4c19481ec853c28 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 98d5689c1d..f6b30a8ff5 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "6489bb7df63a3784b4a94067e3a8fa8917523ab7", + "git_tag" : "815003232d90a45fe6867214e73284649c639066", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From 49e5f502f2c52f73c42bb8d661f2de5a7d726a77 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 5 Oct 2024 10:31:13 +0800 Subject: [PATCH 031/157] Update submodule cudf to 33b8dfa42ff9a600adfa6d10c7740169a0340338 (#2470) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index a8da1ff2b3..33b8dfa42f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit a8da1ff2b393abbafa27dddcf4c19481ec853c28 +Subproject commit 33b8dfa42ff9a600adfa6d10c7740169a0340338 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index f6b30a8ff5..7f4655f776 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -141,7 +141,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "815003232d90a45fe6867214e73284649c639066", + "git_tag" : "c494395e58288cac16321ce90e9b15f3508ae89a", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From 1ac710512b61e7a87e9d3d27c2be50982257a9c5 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 5 Oct 2024 21:17:18 +0800 Subject: [PATCH 032/157] Update submodule cudf to fcff2b6ef7d6db62fc064ad10ffc6c873fc85b58 (#2472) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 33b8dfa42f..fcff2b6ef7 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 33b8dfa42ff9a600adfa6d10c7740169a0340338 +Subproject commit fcff2b6ef7d6db62fc064ad10ffc6c873fc85b58 From cd2484089da41d9f9c1350fbca39385b17efb4b2 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 8 Oct 2024 05:25:49 +0800 Subject: [PATCH 033/157] Update submodule cudf to f926a61c7d31b7b33c3a3482507e9efb44b2cc36 (#2473) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index fcff2b6ef7..f926a61c7d 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit fcff2b6ef7d6db62fc064ad10ffc6c873fc85b58 +Subproject commit f926a61c7d31b7b33c3a3482507e9efb44b2cc36 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index 6936d8fd01..e56b65654c 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -2413be67c97acf57a651481467815036570d11ee +7879f97a0f78e8f6a34dc5e592cf3e5f69d7ae84 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 7f4655f776..07e6d47395 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "473e537a59b87059254187b67de6991b9e35379e", + "git_tag" : "e64c3635e89e6792b169cdf657339f34921a603d", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From 7357481f1fe3d4bed76d50c1744390bc1710964d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 8 Oct 2024 10:50:28 +0800 Subject: [PATCH 034/157] Update submodule cudf to 09ed2105b841fe29be75af8b0d5a41fc09e7b6ac (#2474) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index f926a61c7d..09ed2105b8 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit f926a61c7d31b7b33c3a3482507e9efb44b2cc36 +Subproject commit 09ed2105b841fe29be75af8b0d5a41fc09e7b6ac diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 07e6d47395..1b281d590c 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -109,6 +109,14 @@ "git_shallow" : false, "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb", "git_url" : "https://github.com/apache/arrow-nanoarrow.git", + "patches" : + [ + { + "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff", + "fixed_in" : "", + "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537" + } + ], "version" : "0.6.0.dev" }, "nvcomp" : From c81d392e4239f60e5de9955d5d21f16d8c9b8b08 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Tue, 8 Oct 2024 13:41:50 -0500 Subject: [PATCH 035/157] Disable kvikio remote I/O to avoid openssl dependencies (#2476) Signed-off-by: Jason Lowe --- pom.xml | 1 + thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index ea72dbd5f5..d8efe69dc9 100644 --- a/pom.xml +++ b/pom.xml @@ -425,6 +425,7 @@ + diff --git a/thirdparty/cudf b/thirdparty/cudf index 09ed2105b8..bcf9425a8f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 09ed2105b841fe29be75af8b0d5a41fc09e7b6ac +Subproject commit bcf9425a8fc8bfe4a08840749a16cf83e1bc89e8 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 1b281d590c..7058cd288f 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "e64c3635e89e6792b169cdf657339f34921a603d", + "git_tag" : "dc536af29ccc60938b82fd8d3bd780873fcc8997", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From efa3ba5ebf56b22adf223a70277eb8f0eb0de7fa Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 9 Oct 2024 05:21:55 +0800 Subject: [PATCH 036/157] Update submodule cudf to 553d8ec197c45f7d10ae4571f625e97d7b88be82 (#2477) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index bcf9425a8f..553d8ec197 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit bcf9425a8fc8bfe4a08840749a16cf83e1bc89e8 +Subproject commit 553d8ec197c45f7d10ae4571f625e97d7b88be82 From c5706b050b6d78e4ddbf9541433eca9d28131328 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:31:05 +0800 Subject: [PATCH 037/157] Update submodule cudf to ded4dd2acbf2c5933765853eab56f4d37599c909 (#2478) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 553d8ec197..ded4dd2acb 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 553d8ec197c45f7d10ae4571f625e97d7b88be82 +Subproject commit ded4dd2acbf2c5933765853eab56f4d37599c909 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index e56b65654c..b593754ae0 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -7879f97a0f78e8f6a34dc5e592cf3e5f69d7ae84 +76b55a0f801dc961c0315bb4c219793212e533fd diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 7058cd288f..63724fe7e8 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -60,7 +60,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "d3477661d771e0d6fd22259bf6dd6f8c64a7401c", + "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6", "git_url" : "https://github.com/NVIDIA/cuCollections.git", "version" : "0.0.1" }, From f9ce439f1773a80d4a9fc246664146a4e9560038 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 9 Oct 2024 17:23:05 +0800 Subject: [PATCH 038/157] Update submodule cudf to bfac5e5d9b2c10718d2f0f925b4f2c9f62d8fea1 (#2479) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index ded4dd2acb..bfac5e5d9b 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit ded4dd2acbf2c5933765853eab56f4d37599c909 +Subproject commit bfac5e5d9b2c10718d2f0f925b4f2c9f62d8fea1 From 1a4558af327b763c4d274cf81810c3449aa89bf6 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 9 Oct 2024 17:08:20 +0000 Subject: [PATCH 039/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 18 +++++++++++++----- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 319a53327a..bfac5e5d9b 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 319a53327ac7c921a78979a1f23c5caf7171129d +Subproject commit bfac5e5d9b2c10718d2f0f925b4f2c9f62d8fea1 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index 37820d8ad4..b593754ae0 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -312909127cf0fe96e178f0ffa754908f58d489a3 +76b55a0f801dc961c0315bb4c219793212e533fd diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index a3458a55a8..63724fe7e8 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "1b85263eba89c0f077fbb3da90a770b84161d20f", + "git_tag" : "dc536af29ccc60938b82fd8d3bd780873fcc8997", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -60,7 +60,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "d3477661d771e0d6fd22259bf6dd6f8c64a7401c", + "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6", "git_url" : "https://github.com/NVIDIA/cuCollections.git", "version" : "0.0.1" }, @@ -109,6 +109,14 @@ "git_shallow" : false, "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb", "git_url" : "https://github.com/apache/arrow-nanoarrow.git", + "patches" : + [ + { + "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff", + "fixed_in" : "", + "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537" + } + ], "version" : "0.6.0.dev" }, "nvcomp" : @@ -141,9 +149,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "afe0a3336397b17a96bb703e82f3b6365ee7c41e", + "git_tag" : "c494395e58288cac16321ce90e9b15f3508ae89a", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From 4a7055d3c78dcbea7af06556d3d340d7536f9e8b Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 10 Oct 2024 04:31:12 +0800 Subject: [PATCH 040/157] Update submodule cudf to dfdae599622841bf3f4d523c01eee3ae1fe933f0 (#2485) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index bfac5e5d9b..dfdae59962 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit bfac5e5d9b2c10718d2f0f925b4f2c9f62d8fea1 +Subproject commit dfdae599622841bf3f4d523c01eee3ae1fe933f0 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index b593754ae0..5d73ae95b4 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -76b55a0f801dc961c0315bb4c219793212e533fd +fe3362fa774195a3d434d6835416672e1d46555e diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 63724fe7e8..fec9fd78bf 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "dc536af29ccc60938b82fd8d3bd780873fcc8997", + "git_tag" : "adc3a8ef11dd0053f77a908db9db54fe7c92d5ff", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, @@ -149,7 +149,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "c494395e58288cac16321ce90e9b15f3508ae89a", + "git_tag" : "90a5631e1093ce44c4feceb88fcf557c3dfc043b", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From 2d67496c80c00f4228c2f0a20c262131d5375657 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:19:22 +0800 Subject: [PATCH 041/157] Update submodule cudf to 31423d056c45bd6352f0c611ed5e63423b09b954 (#2486) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index dfdae59962..31423d056c 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit dfdae599622841bf3f4d523c01eee3ae1fe933f0 +Subproject commit 31423d056c45bd6352f0c611ed5e63423b09b954 From 6e9548ef12c63e87437ace917cb83fee48b34aa7 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 10 Oct 2024 22:39:06 +0800 Subject: [PATCH 042/157] Update submodule cudf to 7173b52fce25937bb69e22a083a5de4655078fa1 (#2487) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 31423d056c..7173b52fce 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 31423d056c45bd6352f0c611ed5e63423b09b954 +Subproject commit 7173b52fce25937bb69e22a083a5de4655078fa1 From 811103d781b78a717b0af8e6fcd4363f7d348080 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 11 Oct 2024 05:24:35 +0800 Subject: [PATCH 043/157] Update submodule cudf to 69b0f661ff2fc4c12bb0fe696e556f6b3224b381 (#2488) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 7173b52fce..69b0f661ff 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 7173b52fce25937bb69e22a083a5de4655078fa1 +Subproject commit 69b0f661ff2fc4c12bb0fe696e556f6b3224b381 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index fec9fd78bf..a93318b4a8 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "adc3a8ef11dd0053f77a908db9db54fe7c92d5ff", + "git_tag" : "1ef4094331be58ce881e534d669da706bdb979ed", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From 10fbcff0790b25cb53da30088a2a0b72da10de63 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Thu, 10 Oct 2024 15:37:47 -0700 Subject: [PATCH 044/157] Implement `concat_json` to join JSON strings given by strings column (#2457) * add concat_jsons * Fix compile error Signed-off-by: Nghia Truong * Optimize stream sync Signed-off-by: Nghia Truong * Fix interface Signed-off-by: Nghia Truong * Add JNI binding Signed-off-by: Nghia Truong * Change delimiter Java type Signed-off-by: Nghia Truong * Fix null mask Signed-off-by: Nghia Truong * Return `is_valid` column Signed-off-by: Nghia Truong * Separate input and output validity Signed-off-by: Nghia Truong * Make changes in cudf's Java code * Change `delimiter` type from `byte` to `char`, and rewrite docs Signed-off-by: Nghia Truong * Restore source file Signed-off-by: Nghia Truong * Rename file Signed-off-by: Nghia Truong * Add new source file Signed-off-by: Nghia Truong * Add file to cmake Signed-off-by: Nghia Truong * Optimize implementation Signed-off-by: Nghia Truong * Fix start character Signed-off-by: Nghia Truong * Check for white space characters that are not just space character Signed-off-by: Nghia Truong * Check for delimiter if the character is acceptable Signed-off-by: Nghia Truong * Change `not_whitespace` Signed-off-by: Nghia Truong * Optimize searching for delimiter in just one kernel call Signed-off-by: Nghia Truong * Use existence map instead of histogram Signed-off-by: Nghia Truong * Remove utf8 processing code Signed-off-by: Nghia Truong * Fix `num_values` Signed-off-by: Nghia Truong * Search only for 128 characters Signed-off-by: Nghia Truong * Fix `is_null_or_empty` Signed-off-by: Nghia Truong * Change back to use `cub::DeviceHistogram::HistogramEven` * Implement `JSONUtils.makeStructs` Signed-off-by: Nghia Truong * Rename variables and update docs Signed-off-by: Nghia Truong * Misc Signed-off-by: Nghia Truong * Add stream sync and extract code into separate functions for profiling Signed-off-by: Nghia Truong * Revert "Add stream sync and extract code into separate functions for profiling" This reverts commit a048801977eb4a5bd006db4b7c62c30acc44b232. * Reorganize code Signed-off-by: Nghia Truong * Revert "Reorganize code" This reverts commit f10e73a26dcd5daba827369e89fd207db72241db. * Misc * Use one warp per row to improve performance Signed-off-by: Nghia Truong * Optimize write Signed-off-by: Nghia Truong * Revert "Optimize write" This reverts commit 9af88ca43f62eaf55279e03d62a0ce1567584b60. * Reorganize code Signed-off-by: Nghia Truong --------- Signed-off-by: Nghia Truong Co-authored-by: Karthikeyan Natarajan --- src/main/cpp/CMakeLists.txt | 1 + src/main/cpp/src/JSONUtilsJni.cpp | 46 ++- src/main/cpp/src/json_utils.cu | 275 ++++++++++++++++++ .../cpp/src/{from_json.hpp => json_utils.hpp} | 12 + .../nvidia/spark/rapids/jni/JSONUtils.java | 63 ++++ 5 files changed, 396 insertions(+), 1 deletion(-) create mode 100644 src/main/cpp/src/json_utils.cu rename src/main/cpp/src/{from_json.hpp => json_utils.hpp} (65%) diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt index 1ce648db3d..14edae0ec0 100644 --- a/src/main/cpp/CMakeLists.txt +++ b/src/main/cpp/CMakeLists.txt @@ -215,6 +215,7 @@ add_library( src/from_json_to_raw_map.cu src/get_json_object.cu src/histogram.cu + src/json_utils.cu src/murmur_hash.cu src/parse_uri.cu src/regex_rewrite_utils.cu diff --git a/src/main/cpp/src/JSONUtilsJni.cpp b/src/main/cpp/src/JSONUtilsJni.cpp index 5a0c5dd341..67758e8595 100644 --- a/src/main/cpp/src/JSONUtilsJni.cpp +++ b/src/main/cpp/src/JSONUtilsJni.cpp @@ -15,8 +15,8 @@ */ #include "cudf_jni_apis.hpp" -#include "from_json.hpp" #include "get_json_object.hpp" +#include "json_utils.hpp" #include @@ -154,4 +154,48 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_JSONUtils_extractRawMap } CATCH_STD(env, 0); } + +JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_JSONUtils_concatenateJsonStrings( + JNIEnv* env, jclass, jlong j_input) +{ + JNI_NULL_CHECK(env, j_input, "j_input is null", 0); + + try { + cudf::jni::auto_set_device(env); + auto const input_cv = reinterpret_cast(j_input); + auto [is_valid, joined_strings, delimiter] = + spark_rapids_jni::concat_json(cudf::strings_column_view{*input_cv}); + + // The output array contains 5 elements: + // [0]: address of the cudf::column object `is_valid` in host memory + // [1]: address of data buffer of the concatenated strings in device memory + // [2]: data length + // [3]: address of the rmm::device_buffer object (of the concatenated strings) in host memory + // [4]: delimiter char + auto out_handles = cudf::jni::native_jlongArray(env, 5); + out_handles[0] = reinterpret_cast(is_valid.release()); + out_handles[1] = reinterpret_cast(joined_strings->data()); + out_handles[2] = static_cast(joined_strings->size()); + out_handles[3] = reinterpret_cast(joined_strings.release()); + out_handles[4] = static_cast(delimiter); + return out_handles.get_jArray(); + } + CATCH_STD(env, 0); +} + +JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_JSONUtils_makeStructs( + JNIEnv* env, jclass, jlongArray j_children, jlong j_is_null) +{ + JNI_NULL_CHECK(env, j_children, "j_children is null", 0); + JNI_NULL_CHECK(env, j_is_null, "j_is_null is null", 0); + + try { + cudf::jni::auto_set_device(env); + auto const children = + cudf::jni::native_jpointerArray{env, j_children}.get_dereferenced(); + auto const is_null = *reinterpret_cast(j_is_null); + return cudf::jni::ptr_as_jlong(spark_rapids_jni::make_structs(children, is_null).release()); + } + CATCH_STD(env, 0); +} } diff --git a/src/main/cpp/src/json_utils.cu b/src/main/cpp/src/json_utils.cu new file mode 100644 index 0000000000..85b2dc9301 --- /dev/null +++ b/src/main/cpp/src/json_utils.cu @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace spark_rapids_jni { + +namespace detail { + +namespace { + +constexpr bool not_whitespace(cudf::char_utf8 ch) +{ + return ch != ' ' && ch != '\r' && ch != '\n' && ch != '\t'; +} + +constexpr bool can_be_delimiter(char c) +{ + // The character list below is from `json_reader_options.set_delimiter`. + switch (c) { + case '{': + case '[': + case '}': + case ']': + case ',': + case ':': + case '"': + case '\'': + case '\\': + case ' ': + case '\t': + case '\r': return false; + default: return true; + } +} + +} // namespace + +std::tuple, std::unique_ptr, char> concat_json( + cudf::strings_column_view const& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto const d_input_ptr = cudf::column_device_view::create(input.parent(), stream); + auto const default_mr = rmm::mr::get_current_device_resource(); + + // Check if the input rows are either null, equal to `null` string literal, or empty. + // This will be used for masking out the input when doing string concatenation. + rmm::device_uvector is_valid_input(input.size(), stream, default_mr); + + // Check if the input rows are either null or empty. + // This will be returned to the caller. + rmm::device_uvector is_null_or_empty(input.size(), stream, mr); + + thrust::for_each( + rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0L), + thrust::make_counting_iterator(input.size() * static_cast(cudf::detail::warp_size)), + [input = *d_input_ptr, + output = thrust::make_zip_iterator(thrust::make_tuple( + is_valid_input.begin(), is_null_or_empty.begin()))] __device__(int64_t tidx) { + // Execute one warp per row to minimize thread divergence. + if ((tidx % cudf::detail::warp_size) != 0) { return; } + auto const idx = tidx / cudf::detail::warp_size; + + if (input.is_null(idx)) { + output[idx] = thrust::make_tuple(false, true); + return; + } + + auto const d_str = input.element(idx); + auto const size = d_str.size_bytes(); + int i = 0; + char ch; + + // Skip the very first whitespace characters. + for (; i < size; ++i) { + ch = d_str[i]; + if (not_whitespace(ch)) { break; } + } + + if (i + 3 < size && + (d_str[i] == 'n' && d_str[i + 1] == 'u' && d_str[i + 2] == 'l' && d_str[i + 3] == 'l')) { + i += 4; + + // Skip the very last whitespace characters. + bool is_null_literal{true}; + for (; i < size; ++i) { + ch = d_str[i]; + if (not_whitespace(ch)) { + is_null_literal = false; + break; + } + } + + // The current row contains only `null` string literal and not any other non-whitespace + // characters. Such rows need to be masked out as null when doing concatenation. + if (is_null_literal) { + output[idx] = thrust::make_tuple(false, false); + return; + } + } + + auto const not_eol = i < size; + + // If the current row is not null or empty, it should start with `{`. Otherwise, we need to + // replace it by a null. This is necessary for libcudf's JSON reader to work. + // Note that if we want to support ARRAY schema, we need to check for `[` instead. + auto constexpr start_character = '{'; + if (not_eol && ch != start_character) { + output[idx] = thrust::make_tuple(false, false); + return; + } + + output[idx] = thrust::make_tuple(not_eol, !not_eol); + }); + + auto constexpr num_levels = 256; + auto constexpr lower_level = std::numeric_limits::min(); + auto constexpr upper_level = std::numeric_limits::max(); + auto const num_chars = input.chars_size(stream); + + rmm::device_uvector histogram(num_levels, stream, default_mr); + thrust::uninitialized_fill( + rmm::exec_policy_nosync(stream), histogram.begin(), histogram.end(), 0); + + size_t temp_storage_bytes = 0; + cub::DeviceHistogram::HistogramEven(nullptr, + temp_storage_bytes, + input.chars_begin(stream), + histogram.begin(), + num_levels, + lower_level, + upper_level, + num_chars, + stream.value()); + rmm::device_buffer d_temp(temp_storage_bytes, stream); + cub::DeviceHistogram::HistogramEven(d_temp.data(), + temp_storage_bytes, + input.chars_begin(stream), + histogram.begin(), + num_levels, + lower_level, + upper_level, + num_chars, + stream.value()); + + auto const it = thrust::make_counting_iterator(0); + auto const zero_level_idx = -lower_level; // the bin storing count for character `\0` + auto const zero_level_it = it + zero_level_idx; + auto const end = it + num_levels; + + auto const first_zero_count_pos = + thrust::find_if(rmm::exec_policy_nosync(stream), + zero_level_it, // ignore the negative characters + end, + [zero_level_idx, counts = histogram.begin()] __device__(auto idx) -> bool { + auto const count = counts[idx]; + if (count > 0) { return false; } + auto const first_non_existing_char = static_cast(idx - zero_level_idx); + return can_be_delimiter(first_non_existing_char); + }); + + // This should never happen since the input should never cover the entire char range. + if (first_zero_count_pos == end) { + throw std::logic_error( + "Cannot find any character suitable as delimiter during joining json strings."); + } + auto const delimiter = static_cast(thrust::distance(zero_level_it, first_zero_count_pos)); + + auto [null_mask, null_count] = cudf::detail::valid_if( + is_valid_input.begin(), is_valid_input.end(), thrust::identity{}, stream, default_mr); + // If the null count doesn't change, that mean we do not have any rows containing `null` string + // literal or empty rows. In such cases, just use the input column for concatenation. + auto const input_applied_null = + null_count == input.null_count() + ? cudf::column_view{} + : cudf::column_view{cudf::data_type{cudf::type_id::STRING}, + input.size(), + input.chars_begin(stream), + reinterpret_cast(null_mask.data()), + null_count, + 0, + std::vector{input.offsets()}}; + + auto concat_strings = cudf::strings::detail::join_strings( + null_count == input.null_count() ? input : cudf::strings_column_view{input_applied_null}, + cudf::string_scalar(std::string(1, delimiter), true, stream, default_mr), + cudf::string_scalar("{}", true, stream, default_mr), + stream, + mr); + + return {std::make_unique(std::move(is_null_or_empty), rmm::device_buffer{}, 0), + std::move(concat_strings->release().data), + delimiter}; +} + +std::unique_ptr make_structs(std::vector const& children, + cudf::column_view const& is_null, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + if (children.size() == 0) { return nullptr; } + + auto const row_count = children.front().size(); + for (auto const& col : children) { + CUDF_EXPECTS(col.size() == row_count, "All columns must have the same number of rows."); + } + + auto const [null_mask, null_count] = cudf::detail::valid_if( + is_null.begin(), is_null.end(), thrust::logical_not{}, stream, mr); + + auto const structs = + cudf::column_view(cudf::data_type{cudf::type_id::STRUCT}, + row_count, + nullptr, + reinterpret_cast(null_mask.data()), + null_count, + 0, + children); + return std::make_unique(structs, stream, mr); +} + +} // namespace detail + +std::tuple, std::unique_ptr, char> concat_json( + cudf::strings_column_view const& input, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::concat_json(input, stream, mr); +} + +std::unique_ptr make_structs(std::vector const& children, + cudf::column_view const& is_null, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::make_structs(children, is_null, stream, mr); +} + +} // namespace spark_rapids_jni diff --git a/src/main/cpp/src/from_json.hpp b/src/main/cpp/src/json_utils.hpp similarity index 65% rename from src/main/cpp/src/from_json.hpp rename to src/main/cpp/src/json_utils.hpp index 75fc3bc103..5671a7329a 100644 --- a/src/main/cpp/src/from_json.hpp +++ b/src/main/cpp/src/json_utils.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -31,4 +32,15 @@ std::unique_ptr from_json_to_raw_map( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); +std::tuple, std::unique_ptr, char> concat_json( + cudf::strings_column_view const& input, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + +std::unique_ptr make_structs( + std::vector const& input, + cudf::column_view const& is_null, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + } // namespace spark_rapids_jni diff --git a/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java b/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java index 3a7c4a6a53..1a41e58613 100644 --- a/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java +++ b/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java @@ -160,6 +160,65 @@ public static ColumnVector extractRawMapFromJsonString(ColumnView input) { return new ColumnVector(extractRawMapFromJsonString(input.getNativeView())); } + /** + * A class to hold the result when concatenating JSON strings. + *

+ * A long with the concatenated data, the result also contains a vector that indicates + * whether each row in the input is null or empty, and the delimiter used for concatenation. + */ + public static class ConcatenatedJson implements AutoCloseable { + public final ColumnVector isNullOrEmpty; + public final DeviceMemoryBuffer data; + public final char delimiter; + + public ConcatenatedJson(ColumnVector isNullOrEmpty, DeviceMemoryBuffer data, char delimiter) { + this.isNullOrEmpty = isNullOrEmpty; + this.data = data; + this.delimiter = delimiter; + } + + @Override + public void close() { + isNullOrEmpty.close(); + data.close(); + } + } + + /** + * Concatenate JSON strings in the input column into a single JSON string. + *

+ * During concatenation, the function also generates a boolean vector that indicates whether + * each row in the input is null or empty. The delimiter used for concatenation is also returned. + * + * @param input The input strings column to concatenate + * @return A {@link ConcatenatedJson} object that contains the concatenated output + */ + public static ConcatenatedJson concatenateJsonStrings(ColumnView input) { + assert (input.getType().equals(DType.STRING)) : "Input must be of STRING type"; + long[] concatenated = concatenateJsonStrings(input.getNativeView()); + return new ConcatenatedJson(new ColumnVector(concatenated[0]), + DeviceMemoryBuffer.fromRmm(concatenated[1], concatenated[2], concatenated[3]), + (char) concatenated[4]); + } + + /** + * Create a structs column from the given children columns and a boolean column specifying + * the rows at which the output column.should be null. + *

+ * Note that the children columns are expected to have null rows at the same positions indicated + * by the input isNull column. + * + * @param children The children columns of the output structs column + * @param isNull A boolean column specifying the rows at which the output column should be null + * @return A structs column created from the given children and the isNull column + */ + public static ColumnVector makeStructs(ColumnView[] children, ColumnView isNull) { + long[] handles = new long[children.length]; + for (int i = 0; i < children.length; i++) { + handles[i] = children[i].getNativeView(); + } + return new ColumnVector(makeStructs(handles, isNull.getNativeView())); + } private static native int getMaxJSONPathDepth(); @@ -178,4 +237,8 @@ private static native long[] getJsonObjectMultiplePaths(long input, private static native long extractRawMapFromJsonString(long input); + + private static native long[] concatenateJsonStrings(long input); + + private static native long makeStructs(long[] children, long isNull); } From 42dd8987acb183ffcc534a987ae29b202bc01c39 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:51:06 +0800 Subject: [PATCH 045/157] Update submodule cudf to 1436cac9de8b450a32e71d5b779503e9a29edaa6 (#2489) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 69b0f661ff..1436cac9de 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 69b0f661ff2fc4c12bb0fe696e556f6b3224b381 +Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index 5d73ae95b4..1073147e4f 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -fe3362fa774195a3d434d6835416672e1d46555e +27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae From 6314455a5905d14bb37e94e8f1a669ce6c850ad9 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 11 Oct 2024 03:00:55 +0000 Subject: [PATCH 046/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 24 ++++++++++++++++-------- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 319a53327a..1436cac9de 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 319a53327ac7c921a78979a1f23c5caf7171129d +Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index 37820d8ad4..1073147e4f 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -312909127cf0fe96e178f0ffa754908f58d489a3 +27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index ed40c777a4..a93318b4a8 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "1b85263eba89c0f077fbb3da90a770b84161d20f", + "git_tag" : "1ef4094331be58ce881e534d669da706bdb979ed", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -60,7 +60,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "d3477661d771e0d6fd22259bf6dd6f8c64a7401c", + "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6", "git_url" : "https://github.com/NVIDIA/cuCollections.git", "version" : "0.0.1" }, @@ -109,6 +109,14 @@ "git_shallow" : false, "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb", "git_url" : "https://github.com/apache/arrow-nanoarrow.git", + "patches" : + [ + { + "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff", + "fixed_in" : "", + "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537" + } + ], "version" : "0.6.0.dev" }, "nvcomp" : @@ -119,15 +127,15 @@ "git_url" : "https://github.com/NVIDIA/nvcomp.git", "proprietary_binary" : { - "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz", - "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz" + "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz", + "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz" }, "proprietary_binary_cuda_version_mapping" : { "11" : "11.x", "12" : "12.x" }, - "version" : "3.0.6" + "version" : "4.0.1" }, "nvtx3" : { @@ -141,9 +149,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "afe0a3336397b17a96bb703e82f3b6365ee7c41e", + "git_tag" : "90a5631e1093ce44c4feceb88fcf557c3dfc043b", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From a3132f5e6fdcaadc8216acc03851e17f38ffedfb Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 11 Oct 2024 04:30:58 +0000 Subject: [PATCH 047/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 24 ++++++++++++++++-------- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 319a53327a..1436cac9de 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 319a53327ac7c921a78979a1f23c5caf7171129d +Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index 37820d8ad4..1073147e4f 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -312909127cf0fe96e178f0ffa754908f58d489a3 +27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index ed40c777a4..a93318b4a8 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "1b85263eba89c0f077fbb3da90a770b84161d20f", + "git_tag" : "1ef4094331be58ce881e534d669da706bdb979ed", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -60,7 +60,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "d3477661d771e0d6fd22259bf6dd6f8c64a7401c", + "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6", "git_url" : "https://github.com/NVIDIA/cuCollections.git", "version" : "0.0.1" }, @@ -109,6 +109,14 @@ "git_shallow" : false, "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb", "git_url" : "https://github.com/apache/arrow-nanoarrow.git", + "patches" : + [ + { + "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff", + "fixed_in" : "", + "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537" + } + ], "version" : "0.6.0.dev" }, "nvcomp" : @@ -119,15 +127,15 @@ "git_url" : "https://github.com/NVIDIA/nvcomp.git", "proprietary_binary" : { - "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz", - "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz" + "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz", + "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz" }, "proprietary_binary_cuda_version_mapping" : { "11" : "11.x", "12" : "12.x" }, - "version" : "3.0.6" + "version" : "4.0.1" }, "nvtx3" : { @@ -141,9 +149,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "afe0a3336397b17a96bb703e82f3b6365ee7c41e", + "git_tag" : "90a5631e1093ce44c4feceb88fcf557c3dfc043b", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From 2c85286887727c82a5de8ab2f56ebe49e7767077 Mon Sep 17 00:00:00 2001 From: spark-rapids automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 11 Oct 2024 06:28:34 +0000 Subject: [PATCH 048/157] Auto-merge use branch-24.12 versions Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 24 ++++++++++++++++-------- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 7b0adfa253..1436cac9de 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 7b0adfa2533e4792464230ee67916a04ce06caf6 +Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index 37820d8ad4..1073147e4f 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -312909127cf0fe96e178f0ffa754908f58d489a3 +27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index ed40c777a4..a93318b4a8 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,9 +44,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "1b85263eba89c0f077fbb3da90a770b84161d20f", + "git_tag" : "1ef4094331be58ce881e534d669da706bdb979ed", "git_url" : "https://github.com/rapidsai/kvikio.git", - "version" : "24.10" + "version" : "24.12" }, "bs_thread_pool" : { @@ -60,7 +60,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "d3477661d771e0d6fd22259bf6dd6f8c64a7401c", + "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6", "git_url" : "https://github.com/NVIDIA/cuCollections.git", "version" : "0.0.1" }, @@ -109,6 +109,14 @@ "git_shallow" : false, "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb", "git_url" : "https://github.com/apache/arrow-nanoarrow.git", + "patches" : + [ + { + "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff", + "fixed_in" : "", + "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537" + } + ], "version" : "0.6.0.dev" }, "nvcomp" : @@ -119,15 +127,15 @@ "git_url" : "https://github.com/NVIDIA/nvcomp.git", "proprietary_binary" : { - "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz", - "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz" + "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz", + "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz" }, "proprietary_binary_cuda_version_mapping" : { "11" : "11.x", "12" : "12.x" }, - "version" : "3.0.6" + "version" : "4.0.1" }, "nvtx3" : { @@ -141,9 +149,9 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "afe0a3336397b17a96bb703e82f3b6365ee7c41e", + "git_tag" : "90a5631e1093ce44c4feceb88fcf557c3dfc043b", "git_url" : "https://github.com/rapidsai/rmm.git", - "version" : "24.10" + "version" : "24.12" }, "spdlog" : { From 024b9c6f38b1fd0a221bfedf865ee1dbbd2e81bf Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:09:29 -0700 Subject: [PATCH 049/157] Avoid parsing field name twice when matching named instruction in `get_json_object` kernel (#2471) * Temporarily add back benchmark code Signed-off-by: Nghia Truong * Optimize stack data Signed-off-by: Nghia Truong * Revert "Optimize stack data" This reverts commit 59ac5bf0d494e17fbf9b2a7f4806abe057a65c45. * Reorganize code Signed-off-by: Nghia Truong * Perform name matching when parsing name field Signed-off-by: Nghia Truong * Optimize write Signed-off-by: Nghia Truong * Cleanup Signed-off-by: Nghia Truong * Simplify `char_range_reader` Signed-off-by: Nghia Truong * Try to reduce stack data size Signed-off-by: Nghia Truong * Revert "Try to reduce stack data size" This reverts commit a8d563c033f7c4eb20c3055f2ed99f559e284a2b. * Cleanup Signed-off-by: Nghia Truong * Revert benchmark Signed-off-by: Nghia Truong --------- Signed-off-by: Nghia Truong --- src/main/cpp/src/get_json_object.cu | 24 +-- src/main/cpp/src/json_parser.cuh | 261 ++++++++++++---------------- 2 files changed, 120 insertions(+), 165 deletions(-) diff --git a/src/main/cpp/src/get_json_object.cu b/src/main/cpp/src/get_json_object.cu index f836186192..622a56bc55 100644 --- a/src/main/cpp/src/get_json_object.cu +++ b/src/main/cpp/src/get_json_object.cu @@ -304,17 +304,6 @@ __device__ inline thrust::tuple path_match_index( } } -__device__ inline thrust::tuple path_match_named( - cudf::device_span path) -{ - auto match = path_match_element(path, path_instruction_type::NAMED); - if (match) { - return thrust::make_tuple(true, path.data()[0].name); - } else { - return thrust::make_tuple(false, cudf::string_view()); - } -} - __device__ inline thrust::tuple path_match_index_wildcard( cudf::device_span path) { @@ -464,7 +453,7 @@ __device__ thrust::pair evaluate_path( // case (START_OBJECT, Named :: xs) // case path 4 else if (json_token::START_OBJECT == ctx.token && - thrust::get<0>(path_match_named(ctx.path))) { + ctx.path.front().type == path_instruction_type::NAMED) { if (!ctx.is_first_enter) { // 2st enter // skip the following children after the expect @@ -492,15 +481,16 @@ __device__ thrust::pair evaluate_path( ctx.is_first_enter = false; // match first mached children with expected name bool found_expected_child = false; - while (json_token::END_OBJECT != p.next_token()) { + auto const to_match_name = ctx.path.front().name; + while (true) { + auto const is_name_matched = p.parse_next_token_with_matching(to_match_name); + if (json_token::END_OBJECT == p.get_current_token()) { break; } + // JSON validation check if (json_token::ERROR == p.get_current_token()) { return {false, 0}; } - // need to try more children - auto match_named = path_match_named(ctx.path); - auto named = thrust::get<1>(match_named); // current token is FIELD_NAME - if (p.match_current_field_name(named)) { + if (is_name_matched) { // skip FIELD_NAME token p.next_token(); // JSON validation check diff --git a/src/main/cpp/src/json_parser.cuh b/src/main/cpp/src/json_parser.cuh index 4e712937ed..446caf6357 100644 --- a/src/main/cpp/src/json_parser.cuh +++ b/src/main/cpp/src/json_parser.cuh @@ -134,13 +134,8 @@ class char_range { __device__ inline cudf::size_type size() const { return _len; } __device__ inline char const* data() const { return _data; } - __device__ inline char const* start() const { return _data; } - __device__ inline char const* end() const { return _data + _len; } - - __device__ inline bool eof(cudf::size_type pos) const { return pos >= _len; } __device__ inline bool is_null() const { return _data == nullptr; } - __device__ inline bool is_empty() const { return _len == 0; } - + __device__ inline bool is_empty() const { return _len <= 0; } __device__ inline char operator[](cudf::size_type pos) const { return _data[pos]; } __device__ inline cudf::string_view slice_sv(cudf::size_type pos, cudf::size_type len) const @@ -153,35 +148,29 @@ class char_range { return char_range(_data + pos, len); } - private: + protected: char const* _data; cudf::size_type _len; }; /** - * A char_range that keeps track of where in the data it currently is. + * A char range that moves the begin pointer of the current range forward while reading. + * + * This support continuous reading of characters without the need of an additional variable + * to keep track of the current reading position. */ -class char_range_reader { +class char_range_reader : public char_range { public: - __device__ inline explicit char_range_reader(char_range range) : _range(range), _pos(0) {} - - __device__ inline char_range_reader(char_range range, cudf::size_type start) - : _range(range), _pos(start) + __device__ inline explicit char_range_reader(char_range range) : char_range(std::move(range)) {} + __device__ inline void next() { + _data++; + _len--; } - __device__ inline bool eof() const { return _range.eof(_pos); } - __device__ inline bool is_null() const { return _range.is_null(); } - - __device__ inline void next() { _pos++; } - - __device__ inline char current_char() const { return _range[_pos]; } - - __device__ inline cudf::size_type pos() const { return _pos; } - - private: - char_range _range; - cudf::size_type _pos; + // Warning: this does not check for out-of-bound access. + // The caller must be responsible to check for empty range before calling this. + __device__ inline char current_char() const { return _data[0]; } }; /** @@ -298,7 +287,7 @@ class json_parser { */ static __device__ inline bool try_skip(char_range_reader& reader, char expected) { - if (!reader.eof() && reader.current_char() == expected) { + if (!reader.is_empty() && reader.current_char() == expected) { reader.next(); return true; } @@ -412,12 +401,10 @@ class json_parser { */ __device__ inline void parse_string_and_set_current() { - // TODO eventually chars should be a reader so we can just pass it in... - char_range_reader reader(chars, curr_pos); - auto [success, end_char_pos] = try_parse_string(reader); + [[maybe_unused]] auto const [success, matched, end] = + try_parse_string(char_range_reader{chars.slice(curr_pos, chars.size() - curr_pos)}); if (success) { - // TODO remove end_char_pos, and just get it from the reader... - curr_pos = end_char_pos; + curr_pos = static_cast(thrust::distance(chars.data(), end)); current_token = json_token::VALUE_STRING; } else { set_current_error(); @@ -499,7 +486,7 @@ class json_parser { char* copy_destination, escape_style w_style) { - if (str.eof()) { return 0; } + if (str.is_empty()) { return 0; } char const quote_char = str.current_char(); int output_size_bytes = 0; @@ -514,7 +501,7 @@ class json_parser { str.next(); // scan string content - while (!str.eof()) { + while (!str.is_empty()) { char const c = str.current_char(); int const v = static_cast(c); if (c == quote_char) { @@ -546,8 +533,10 @@ class json_parser { } else if ('\\' == c) { // path 3: escape path str.next(); - char_range_reader to_match(char_range::null()); - if (!try_skip_escape_part(str, to_match, copy_destination, w_style, output_size_bytes)) { + char_range_reader to_match(char_range::null()); // unused + bool matched_field_name{false}; // unused + if (!try_skip_escape_part( + str, to_match, copy_destination, w_style, output_size_bytes, matched_field_name)) { return output_size_bytes; } } else { @@ -610,85 +599,58 @@ class json_parser { * * @param str string to parse * @param to_match expected match str - * @param w_style the escape style for writing. - * @return a pair of success and length, where success is true if the string - * is valid and length is the number of bytes needed to encode the string - * in the given style. + * @return a tuple of values indicating if the parse process was successful, if field name was + * matched, and a pointer to the past-end position of the parsed data */ - static __device__ inline std::pair try_parse_string( - char_range_reader& str, - char_range_reader to_match = char_range_reader(char_range::null()), - escape_style w_style = escape_style::UNESCAPED) + static __device__ inline thrust::tuple try_parse_string( + char_range_reader str, char_range_reader to_match = char_range_reader(char_range::null())) { - if (str.eof()) { return std::make_pair(false, 0); } - char const quote_char = str.current_char(); - int output_size_bytes = 0; - - // write the first " if write style is escaped - if (escape_style::ESCAPED == w_style) { output_size_bytes++; } + if (str.is_empty()) { return thrust::make_tuple(false, false, nullptr); } + char const quote_char = str.current_char(); + bool matched_field_name = !to_match.is_null(); // skip left quote char // We don't need to actually verify what it is, because we just read it. str.next(); // scan string content - while (!str.eof()) { + while (!str.is_empty()) { char c = str.current_char(); int v = static_cast(c); - if (c == quote_char) { - // path 1: match closing quote char + if (c == quote_char) { // path 1: match closing quote char str.next(); - - // match check, the last char in match_str is quote_char - if (!to_match.is_null() && !to_match.eof()) { return std::make_pair(false, 0); } - - // write the end " if write style is escaped - if (escape_style::ESCAPED == w_style) { output_size_bytes++; } - - return std::make_pair(true, str.pos()); - } else if (v >= 0 && v < 32) { - // path 2: unescaped control char - - // copy if enabled, escape mode, write more chars - if (escape_style::ESCAPED == w_style) { - int escape_chars = escape_char(str.current_char(), nullptr); - output_size_bytes += (escape_chars - 1); - } - - // check match if enabled - if (!try_match_char(to_match, str.current_char())) { return std::make_pair(false, 0); } - + matched_field_name = matched_field_name && (to_match.is_null() || to_match.is_empty()); + return thrust::make_tuple(true, matched_field_name, str.data()); + } else if (v >= 0 && v < 32) { // path 2: unescaped control char + matched_field_name = matched_field_name && try_match_char(to_match, c); str.next(); - output_size_bytes++; continue; - } else if ('\\' == c) { - // path 3: escape path + } else if ('\\' == c) { // path 3: escape path str.next(); - char* copy_dest_nullptr = nullptr; - if (!try_skip_escape_part(str, to_match, copy_dest_nullptr, w_style, output_size_bytes)) { - return std::make_pair(false, 0); - } - } else { - // path 4: safe code point - - // handle single unescaped " char; happens when string is quoted by char ' - // e.g.: 'A"' string, escape to "A\\"" (5 chars: " A \ " ") - if ('\"' == c && escape_style::ESCAPED == w_style) { output_size_bytes++; } - if (!try_skip_safe_code_point(str, c)) { return std::make_pair(false, 0); } - // check match if enabled - if (!try_match_char(to_match, c)) { return std::make_pair(false, 0); } - output_size_bytes++; + char* copy_dest_nullptr = nullptr; // unused + int output_size_bytes = 0; // unused + if (!try_skip_escape_part(str, + to_match, + copy_dest_nullptr, + escape_style::UNESCAPED, + output_size_bytes, + matched_field_name)) { + return thrust::make_tuple(false, false, nullptr); + } + } else { // path 4: safe code point + if (!try_skip_safe_code_point(str, c)) { return thrust::make_tuple(false, false, nullptr); } + matched_field_name = matched_field_name && try_match_char(to_match, c); } } - return std::make_pair(false, 0); + return thrust::make_tuple(false, false, nullptr); } static __device__ inline bool try_match_char(char_range_reader& reader, char c) { if (!reader.is_null()) { - if (!reader.eof() && reader.current_char() == c) { + if (!reader.is_empty() && reader.current_char() == c) { reader.next(); return true; } else { @@ -708,11 +670,12 @@ class json_parser { char_range_reader& to_match, char*& copy_dest, escape_style w_style, - int& output_size_bytes) + int& output_size_bytes, + bool& matched_field_name) { // already skipped the first '\' // try skip second part - if (!str.eof()) { + if (!str.is_empty()) { char const c = str.current_char(); switch (c) { // path 1: \", \', \\, \/, \b, \f, \n, \r, \t @@ -725,17 +688,17 @@ class json_parser { } output_size_bytes++; } - if (!try_match_char(to_match, c)) { return false; } output_size_bytes++; str.next(); + matched_field_name = matched_field_name && try_match_char(to_match, c); return true; case '\'': // for both unescaped/escaped writes a single char ' if (nullptr != copy_dest) { *copy_dest++ = c; } - if (!try_match_char(to_match, c)) { return false; } output_size_bytes++; str.next(); + matched_field_name = matched_field_name && try_match_char(to_match, c); return true; case '\\': if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = c; } @@ -746,16 +709,16 @@ class json_parser { } output_size_bytes++; } - if (!try_match_char(to_match, c)) { return false; } output_size_bytes++; str.next(); + matched_field_name = matched_field_name && try_match_char(to_match, c); return true; case '/': // for both unescaped/escaped writes a single char / if (nullptr != copy_dest) { *copy_dest++ = c; } - if (!try_match_char(to_match, c)) { return false; } output_size_bytes++; str.next(); + matched_field_name = matched_field_name && try_match_char(to_match, c); return true; case 'b': if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\b'; } @@ -766,9 +729,9 @@ class json_parser { } output_size_bytes++; } - if (!try_match_char(to_match, '\b')) { return false; } output_size_bytes++; str.next(); + matched_field_name = matched_field_name && try_match_char(to_match, '\b'); return true; case 'f': if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\f'; } @@ -779,9 +742,9 @@ class json_parser { } output_size_bytes++; } - if (!try_match_char(to_match, '\f')) { return false; } output_size_bytes++; str.next(); + matched_field_name = matched_field_name && try_match_char(to_match, '\f'); return true; case 'n': if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\n'; } @@ -792,9 +755,9 @@ class json_parser { } output_size_bytes++; } - if (!try_match_char(to_match, '\n')) { return false; } output_size_bytes++; str.next(); + matched_field_name = matched_field_name && try_match_char(to_match, '\n'); return true; case 'r': if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\r'; } @@ -805,9 +768,9 @@ class json_parser { } output_size_bytes++; } - if (!try_match_char(to_match, '\r')) { return false; } output_size_bytes++; str.next(); + matched_field_name = matched_field_name && try_match_char(to_match, '\r'); return true; case 't': if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\t'; } @@ -818,9 +781,9 @@ class json_parser { } output_size_bytes++; } - if (!try_match_char(to_match, '\t')) { return false; } output_size_bytes++; str.next(); + matched_field_name = matched_field_name && try_match_char(to_match, '\t'); return true; // path 1 done: \", \', \\, \/, \b, \f, \n, \r, \t case 'u': @@ -829,7 +792,7 @@ class json_parser { // for both unescaped/escaped writes corresponding utf8 bytes, no need // to pass in write style - return try_skip_unicode(str, to_match, copy_dest, output_size_bytes); + return try_skip_unicode(str, to_match, copy_dest, output_size_bytes, matched_field_name); default: // path 3: invalid return false; @@ -948,13 +911,14 @@ class json_parser { static __device__ bool try_skip_unicode(char_range_reader& str, char_range_reader& to_match, char*& copy_dest, - int& output_size_bytes) + int& output_size_bytes, + bool& matched_field_name) { // already parsed \u // now we expect 4 hex chars. cudf::char_utf8 code_point = 0; for (size_t i = 0; i < 4; i++) { - if (str.eof()) { return false; } + if (str.is_empty()) { return false; } char const c = str.current_char(); str.next(); if (!is_hex_digit(c)) { return false; } @@ -976,9 +940,12 @@ class json_parser { } } - if (!to_match.is_null()) { + if (matched_field_name && !to_match.is_null()) { for (cudf::size_type i = 0; i < bytes; i++) { - if (to_match.eof() || to_match.current_char() != buff[i]) { return false; } + if (to_match.is_empty() || to_match.current_char() != buff[i]) { + matched_field_name = false; + break; + } to_match.next(); } } @@ -1210,16 +1177,17 @@ class json_parser { /** * parse the key string in key:value pair */ - __device__ inline void parse_field_name_and_set_current() + __device__ inline void parse_field_name_and_set_current( + bool& matched_field_name, char_range to_match_field_name = char_range::null()) { - // TODO eventually chars should be a reader so we can just pass it in... - char_range_reader reader(chars, curr_pos); - current_token_start_pos = curr_pos; - auto [success, end_char_pos] = try_parse_string(reader); + current_token_start_pos = curr_pos; + auto const [success, matched, end] = + try_parse_string(char_range_reader{chars.slice(curr_pos, chars.size() - curr_pos)}, + char_range_reader{std::move(to_match_field_name)}); if (success) { - // TODO remove end_char_pos, and just get it from the reader... - curr_pos = end_char_pos; - current_token = json_token::FIELD_NAME; + matched_field_name = matched; + curr_pos = static_cast(thrust::distance(chars.data(), end)); + current_token = json_token::FIELD_NAME; } else { set_current_error(); } @@ -1228,11 +1196,12 @@ class json_parser { /** * continute parsing the next token and update current token * Note: only parse one token at a time - * @param[out] has_comma_before_token has comma before next token - * @param[out] has_colon_before_token has colon before next token */ - __device__ inline void parse_next_token_and_set_current(bool& has_comma_before_token, - bool& has_colon_before_token) + __device__ inline void parse_next_token_and_set_current( + bool& has_comma_before_token, + bool& has_colon_before_token, + bool& matched_field_name, + char_range to_match_field_name = char_range::null()) { skip_whitespaces(); if (!eof()) { @@ -1264,7 +1233,7 @@ class json_parser { current_token = json_token::END_OBJECT; } else { // parse key in key:value pair - parse_field_name_and_set_current(); + parse_field_name_and_set_current(matched_field_name, to_match_field_name); } } else if (current_token == json_token::FIELD_NAME) { if (c == ':') { @@ -1289,7 +1258,7 @@ class json_parser { // parse next key:value pair curr_pos++; skip_whitespaces(); - parse_field_name_and_set_current(); + parse_field_name_and_set_current(matched_field_name, to_match_field_name); } else { set_current_error(); } @@ -1351,10 +1320,29 @@ class json_parser { // parse next token bool has_comma_before_token; // no-initialization because of do not care here bool has_colon_before_token; // no-initialization because of do not care here - parse_next_token_and_set_current(has_comma_before_token, has_colon_before_token); + bool matched_field_name; // no-initialization because of do not care here + parse_next_token_and_set_current( + has_comma_before_token, has_colon_before_token, matched_field_name); return current_token; } + /** + * Continute parsing the next token. If the token is a field name then check if it is + * matched with the given name. + */ + __device__ bool parse_next_token_with_matching(cudf::string_view to_match_field_name) + { + // parse next token + bool has_comma_before_token; // no-initialization because of do not care here + bool has_colon_before_token; // no-initialization because of do not care here + bool matched_field_name; + parse_next_token_and_set_current(has_comma_before_token, + has_colon_before_token, + matched_field_name, + char_range{to_match_field_name}); + return matched_field_name; + } + /** * get current token */ @@ -1573,31 +1561,6 @@ class json_parser { return 0; } - /** - * match field name string when current token is FIELD_NAME, - * return true if current token is FIELD_NAME and match successfully. - * return false otherwise, - */ - __device__ bool match_current_field_name(cudf::string_view name) const - { - return match_current_field_name(char_range(name)); - } - - /** - * match current field name - */ - __device__ bool match_current_field_name(char_range name) const - { - if (json_token::FIELD_NAME == current_token) { - char_range_reader reader(current_range()); - char_range_reader to_match(name); - auto [b, end_pos] = try_parse_string(reader, to_match, escape_style::UNESCAPED); - return b; - } else { - return false; - } - } - /** * copy current structure to destination. * return false if meets JSON format error, @@ -1648,7 +1611,9 @@ class json_parser { bool has_colon_before_token = false; // parse and get has_comma_before_token, has_colon_before_token - parse_next_token_and_set_current(has_comma_before_token, has_colon_before_token); + bool matched_field_name; // unused + parse_next_token_and_set_current( + has_comma_before_token, has_colon_before_token, matched_field_name); // check the JSON format if (current_token == json_token::ERROR) { return thrust::make_pair(false, 0); } From 9356867d10dcacdd88005c79e10c44040e239afd Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Fri, 11 Oct 2024 16:26:54 -0500 Subject: [PATCH 050/157] Nvcomp revert followup (#2497) Signed-off-by: Robert (Bobby) Evans --- ci/submodule-sync.sh | 2 +- patches/revert_nvcomp4.patch | 82 +++++++++++++++++++++--------- pom.xml | 8 ++- thirdparty/cudf-pins/versions.json | 8 +-- 4 files changed, 68 insertions(+), 32 deletions(-) diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh index 29b0cf5dad..bd9d8d87bb 100755 --- a/ci/submodule-sync.sh +++ b/ci/submodule-sync.sh @@ -71,7 +71,7 @@ echo "Test against ${cudf_sha}..." MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B" set +e # Don't do a full build. Just try to update/build CUDF with no patches on top of it. -${MVN} validate ${MVN_MIRROR} \ +${MVN} antrun:run@build-libcudf ${MVN_MIRROR} \ -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ -Dlibcudf.build.configure=true \ -Dlibcudf.dependency.mode=latest \ diff --git a/patches/revert_nvcomp4.patch b/patches/revert_nvcomp4.patch index 88b58b14dc..914c033088 100644 --- a/patches/revert_nvcomp4.patch +++ b/patches/revert_nvcomp4.patch @@ -25,7 +25,7 @@ index 5e9f7f8a0c..0e4745bda2 100755 ${package_dir}/dist/* diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml -index 5a05dfd053..e7363645d6 100644 +index bd5e6c3d56..74ca3fda1a 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -58,7 +58,7 @@ dependencies: @@ -38,11 +38,11 @@ index 5a05dfd053..e7363645d6 100644 - openpyxl - packaging diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml -index 8490296233..3559a1a341 100644 +index 565a3ebfa3..22619acf4a 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -56,7 +56,7 @@ dependencies: - - numba>=0.57 + - numba-cuda>=0.0.13 - numpy>=1.23,<3.0a0 - numpydoc -- nvcomp==4.0.1 @@ -50,6 +50,15 @@ index 8490296233..3559a1a341 100644 - nvtx>=0.2.1 - openpyxl - packaging +@@ -67,7 +67,7 @@ dependencies: + - pre-commit + - pyarrow>=14.0.0,<18.0.0a0 + - pydata-sphinx-theme!=0.14.2 +-- pynvjitlink>=0.0.0a0 ++- pynvjitlink + - pytest-benchmark + - pytest-cases>=3.8.2 + - pytest-cov diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index dc75eb4b25..67d501d746 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml @@ -697,7 +706,7 @@ index 60a64fb0ee..40cfbe763b 100644 /** diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu -index c588fedb85..bab70c126b 100644 +index 27312a4da8..779d40281b 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -865,18 +865,8 @@ std::vector compute_page_splits_by_row(device_span=2.5.0,<2.6.0a0 +@@ -381,21 +381,21 @@ dependencies: + - output_types: conda + packages: # Align nvcomp version with rapids-cmake - - nvcomp==4.0.1 + - nvcomp==3.0.6 - - spdlog>=1.14.1,<1.15 + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + packages: +- - nvidia-nvcomp-cu12==4.0.1 ++ - nvidia-nvcomp-cu12==3.0.6 + - matrix: + cuda: "11.*" + packages: +- - nvidia-nvcomp-cu11==4.0.1 ++ - nvidia-nvcomp-cu11==3.0.6 + - matrix: + packages: +- - nvidia-nvcomp==4.0.1 ++ - nvidia-nvcomp==3.0.6 rapids_build_skbuild: common: + - output_types: [conda, requirements, pyproject] +@@ -665,7 +665,7 @@ dependencies: + matrices: + - matrix: {cuda: "12.*"} + packages: +- - &pynvjitlink_unsuffixed pynvjitlink>=0.0.0a0 ++ - &pynvjitlink_unsuffixed pynvjitlink + - matrix: {cuda: "11.*"} + packages: + - &cubinlinker_unsuffixed cubinlinker +@@ -676,7 +676,7 @@ dependencies: + cuda: "12.*" + cuda_suffixed: "true" + packages: +- - pynvjitlink-cu12>=0.0.0a0 ++ - pynvjitlink-cu12 + - matrix: + cuda: "12.*" + cuda_suffixed: "false" diff --git a/docs/cudf/source/user_guide/io/io.md b/docs/cudf/source/user_guide/io/io.md index 97b961b455..adcdaa51e7 100644 --- a/docs/cudf/source/user_guide/io/io.md @@ -843,7 +887,7 @@ index 97b961b455..adcdaa51e7 100644 - ``` diff --git a/java/pom.xml b/java/pom.xml -index e4f1cdf64e..9694e741f1 100644 +index 450cfbdbc8..55cb055398 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -1,6 +1,6 @@ @@ -893,15 +937,3 @@ index 32045f3c50..c18a90140b 100644 COMMENT "Copying nvcomp libraries to ${PROJECT_BINARY_DIR}" ) endif() -diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt -index 0a8f5c4807..96eb6c3bb3 100644 ---- a/python/libcudf/CMakeLists.txt -+++ b/python/libcudf/CMakeLists.txt -@@ -48,5 +48,6 @@ add_subdirectory(../../cpp cudf-cpp) - # Ensure other libraries needed by libcudf.so get installed alongside it. - include(cmake/Modules/WheelHelpers.cmake) - install_aliased_imported_targets( -- TARGETS cudf nvcomp::nvcomp DESTINATION ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -+ TARGETS cudf nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION -+ ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} - ) diff --git a/pom.xml b/pom.xml index b9c6877688..c3156fdb57 100644 --- a/pom.xml +++ b/pom.xml @@ -429,7 +429,7 @@ build-libcudf validate - + @@ -466,7 +466,8 @@ + executable="cmake" + unless:true="${submodule.patch.skip}"> @@ -483,6 +484,7 @@ build-libcudfjni validate + ${submodule.patch.skip} build-sparkrapidsjni validate + ${submodule.patch.skip} build-info generate-resources + ${submodule.patch.skip} Date: Sat, 12 Oct 2024 10:52:51 +0800 Subject: [PATCH 051/157] Update submodule cudf to be1dd3267ed3cf7045c573ccc622f34fd159675f (#2500) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 1436cac9de..be1dd3267e 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6 +Subproject commit be1dd3267ed3cf7045c573ccc622f34fd159675f diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 72c0ecec7f..df743054ff 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "1ef4094331be58ce881e534d669da706bdb979ed", + "git_tag" : "22668fa1d9ea5918f463c52bcdcb5ef181e5d1d0", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, @@ -149,7 +149,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "90a5631e1093ce44c4feceb88fcf557c3dfc043b", + "git_tag" : "1b70ffdd5ab460ac481f1575c42e8c1fccfda792", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, @@ -162,4 +162,4 @@ "version" : "1.14.1" } } -} +} \ No newline at end of file From 282e0d0a7cce8d1c24ad6e44a5d22d6bb1733599 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 12 Oct 2024 16:02:08 +0800 Subject: [PATCH 052/157] Update submodule cudf to 4dbb8a354a9d4f0b4d82a5bf9747409c6304358f (#2501) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index be1dd3267e..4dbb8a354a 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit be1dd3267ed3cf7045c573ccc622f34fd159675f +Subproject commit 4dbb8a354a9d4f0b4d82a5bf9747409c6304358f From 2c3b60cc8c3411350bfa2639d4f752fa9c4e4f96 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 14 Oct 2024 17:29:29 -0500 Subject: [PATCH 053/157] Make it so applying and removing patches are repeatable without errors (#2502) * Make it so applying and removing patches are repeatable without errors Signed-off-by: Robert (Bobby) Evans * Adjust config for skipping a patch * More fixes Signed-off-by: Robert (Bobby) Evans --------- Signed-off-by: Robert (Bobby) Evans --- build/apply-patches | 49 +++++++++++++++++++++++++++----- build/unapply-patches | 66 ++++++++++++++++++++++++++++++++++--------- ci/submodule-sync.sh | 5 ++-- pom.xml | 12 ++++---- 4 files changed, 103 insertions(+), 29 deletions(-) diff --git a/build/apply-patches b/build/apply-patches index 991613e6dc..31c2adcfdd 100755 --- a/build/apply-patches +++ b/build/apply-patches @@ -16,8 +16,6 @@ # limitations under the License. # -# Run a command in a Docker container with devtoolset - set -e BASE_DIR=$( git rev-parse --show-toplevel ) @@ -26,14 +24,51 @@ PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")} CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")} +# Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to +# making sure that a user can do development work in spark-rapids-jni without the patches +# getting in the way +# The operations I really want to support no matter what state CUDF is in are +# 1) Build the repo from scratch +# 2) Rebuild the repo without having to clean and start over +# 3) upmerge to a new version of the plugin including updating the cudf submodule +# +# Building from scratch is simple. We want clean to unapply any patches and +# build to apply them. But if we want to rebuild without a clean we need to know what +# state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this +# is to save some state files about what happened. But a user could mess with CUDF directly +# so we want to have ways to double check that they are indeed correct. + +FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch" + pushd "$CUDF_DIR" -if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then - echo "Error: CUDF repository has uncommitted changes. No patches will be applied..." - exit 1 + +PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty) + +if [ -z "$PATCH_FILES" ] ; then + echo "No patches to apply" + exit 0 +fi + +CHANGED_FILES=$(git status --porcelain --untracked-files=no) + +if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then + if git apply -R --check "$FULLY_PATCHED_FILE" ; then + echo "Patches appear to have been applied already" + exit 0 + fi +fi + +if [ -n "$CHANGED_FILES" ] ; then + echo "Error: CUDF repository has uncommitted changes. No patches will be applied. Please clean the repository so we can try and add the needed patches" + echo "$CHANGED_FILE" + exit 1 fi find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV | while IFS= read -r -d '' file; do - echo "patching with: $file" - patch --no-backup-if-mismatch -f -t --reject-file=- -p1 -i "$file" + echo "patching with: $file" + git apply -v "$file" done + +git diff > "$FULLY_PATCHED_FILE" + popd diff --git a/build/unapply-patches b/build/unapply-patches index 186a781ade..a31708e25f 100755 --- a/build/unapply-patches +++ b/build/unapply-patches @@ -16,29 +16,67 @@ # limitations under the License. # -# Run a command in a Docker container with devtoolset - set -e -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +BASE_DIR=$( git rev-parse --show-toplevel ) + +PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")} -PATCH_DIR=${PATCH_DIR:-$(realpath "$SCRIPT_DIR/../patches/")} +CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")} -CUDF_DIR=${CUDF_DIR:-$(realpath "$SCRIPT_DIR/../thirdparty/cudf/")} +# Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to +# making sure that a user can do development work in spark-rapids-jni without the patches +# getting in the way +# The operations I really want to support no matter what state CUDF is in are +# 1) Build the repo from scratch +# 2) Rebuild the repo without having to clean and start over +# 3) upmerge to a new version of the plugin including updating the cudf submodule +# +# Building from scratch is simple. We want clean to unapply any patches and +# build to apply them. But if we want to rebuild without a clean we need to know what +# state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this +# is to save some state files about what happened. But a user could mess with CUDF directly +# so we want to have ways to double check that they are indeed correct. +FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch" pushd "$CUDF_DIR" -if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then - #only try to remove patches if it looks like something was changed - find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV -r | while IFS= read -r -d '' file; do - echo "patching with: $file" - patch -R --no-backup-if-mismatch --reject-file=- -f -t -p1 -i "$file" - done + +PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty) + +if [ -z "$PATCH_FILES" ] ; then + echo "No patches to remove" + exit 0 fi -# Check for modifications -if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then - echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected" +CHANGED_FILES=$(git status --porcelain --untracked-files=no) + +if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then + if git apply --check -R "$FULLY_PATCHED_FILE"; then + echo "Patches appear to have been applied, so going to remove them" + git apply -R -v "$FULLY_PATCHED_FILE" + rm -f "$FULLY_PATCHED_FILE" + + # Check for modifications, again + if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then + echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected" + git status --porcelain --untracked-files=no + exit 1 + fi + + exit 0 + else + echo "Files are changed, but in a way where the full path file does not apply to remove them $FULL_PATCHED_FILE" exit 1 + fi fi + +if [ -n "$CHANGED_FILES" ] ; then + echo "Error: CUDF repository has uncommitted changes, but does not appear to have been patched. Please clean it and try again." + echo "$CHANGED_FILE" + exit 1 +else + echo "No changes in CUDF repository to remove" +fi + popd diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh index bd9d8d87bb..25cc6b9901 100755 --- a/ci/submodule-sync.sh +++ b/ci/submodule-sync.sh @@ -71,12 +71,13 @@ echo "Test against ${cudf_sha}..." MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B" set +e # Don't do a full build. Just try to update/build CUDF with no patches on top of it. +# calling the antrun directly skips applying patches and also only builds +# libcudf ${MVN} antrun:run@build-libcudf ${MVN_MIRROR} \ -DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \ -Dlibcudf.build.configure=true \ -Dlibcudf.dependency.mode=latest \ - -Dsubmodule.patch.skip \ - -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \ + -DUSE_GDS=ON \ -DBUILD_TESTS=ON \ -DUSE_SANITIZER=ON validate_status=$? diff --git a/pom.xml b/pom.xml index c3156fdb57..a50feefb22 100644 --- a/pom.xml +++ b/pom.xml @@ -110,6 +110,10 @@ UTF-8 1.7.30 false + false 3.0.0 0.2.2 @@ -429,7 +433,7 @@ build-libcudf validate - + @@ -466,8 +470,7 @@ + executable="cmake"> @@ -484,7 +487,6 @@ build-libcudfjni validate - ${submodule.patch.skip} build-sparkrapidsjni validate - ${submodule.patch.skip} build-info generate-resources - ${submodule.patch.skip} Date: Tue, 15 Oct 2024 11:27:36 -0500 Subject: [PATCH 054/157] Update to latest cudf 24.12 and add cudftestutil_impl dependency to tests (#2505) Signed-off-by: Jason Lowe --- src/main/cpp/tests/CMakeLists.txt | 3 ++- thirdparty/cudf | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/cpp/tests/CMakeLists.txt b/src/main/cpp/tests/CMakeLists.txt index 244d18c903..c774d30618 100644 --- a/src/main/cpp/tests/CMakeLists.txt +++ b/src/main/cpp/tests/CMakeLists.txt @@ -31,7 +31,8 @@ function(ConfigureTest CMAKE_TEST_NAME) INSTALL_RPATH "\$ORIGIN/../../../lib" ) target_link_libraries(${CMAKE_TEST_NAME} GTest::gtest_main GTest::gmock_main cudf::cudf - cudf::cudftestutil spark_rapids_jni) + cudf::cudftestutil cudf::cudftestutil_impl + spark_rapids_jni) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) install( TARGETS ${CMAKE_TEST_NAME} diff --git a/thirdparty/cudf b/thirdparty/cudf index 4dbb8a354a..319ec3b803 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 4dbb8a354a9d4f0b4d82a5bf9747409c6304358f +Subproject commit 319ec3b8031e4deb7dfc3f4c4a07a10ef88c131f From 41945c62f023a5cf232463d8eeeb33d6951781a7 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 16 Oct 2024 02:27:06 +0800 Subject: [PATCH 055/157] Update submodule cudf to 7bcfc87935b7a202002d54e17e140789b02f16e9 (#2507) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 319ec3b803..7bcfc87935 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 319ec3b8031e4deb7dfc3f4c4a07a10ef88c131f +Subproject commit 7bcfc87935b7a202002d54e17e140789b02f16e9 From e118e6eca1f6f67d4699a7eae6cb7d5481d87a20 Mon Sep 17 00:00:00 2001 From: Peixin Date: Wed, 16 Oct 2024 08:50:30 +0800 Subject: [PATCH 056/157] Make submodule-sync always try update cudf-pins (#2504) Signed-off-by: Peixin Li --- ci/submodule-sync.sh | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh index 25cc6b9901..ea7c06b7ec 100755 --- a/ci/submodule-sync.sh +++ b/ci/submodule-sync.sh @@ -57,17 +57,19 @@ if [ -n "$CUDF_TAG" ]; then else git submodule update --remote --merge fi + +cudf_pins_only=false cudf_sha=$(git -C thirdparty/cudf rev-parse HEAD) if [[ "${cudf_sha}" == "${cudf_prev_sha}" ]]; then - echo "Submodule is up to date." - exit 0 + echo "cuDF submodule is up to date. Try update cudf-pins..." + cudf_pins_only=true +else + echo "Try update cudf submodule to ${cudf_sha}..." + git add . + git commit -s -m "Update submodule cudf to ${cudf_sha}" fi -echo "Try update cudf submodule to ${cudf_sha}..." -git add . - -echo "Test against ${cudf_sha}..." - +echo "Build libcudf only to update pinned versions..." MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B" set +e # Don't do a full build. Just try to update/build CUDF with no patches on top of it. @@ -101,9 +103,17 @@ sed -i -e 's/4\.0\.1\.0/3.0.6/' \ # the updated versions.json generated by the build echo "Update cudf submodule to ${cudf_sha} with updated pinned versions" git add . -git diff-index --quiet HEAD || git commit -s -m "Update submodule cudf to ${cudf_sha}" +if ! git diff-index --quiet HEAD; then + # We perform a squash merge for submodule-sync commits + git commit -s -m "Update pinned versions for cudf ${cudf_sha}" +elif ${cudf_pins_only}; then + echo "No changes to commit. Exit early..." + exit 0 +fi + sha=$(git rev-parse HEAD) +echo "Test against ${cudf_sha}..." set +e # now build and test everything with the patches in place ${MVN} clean verify ${MVN_MIRROR} \ From 33a92f75a550b715d7d4cd7ede799264fc2c4dbe Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 16 Oct 2024 16:48:57 +0800 Subject: [PATCH 057/157] Update submodule cudf to 3420c71cb72f63db8d63164446cca042f354a08e (#2508) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 7bcfc87935..3420c71cb7 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 7bcfc87935b7a202002d54e17e140789b02f16e9 +Subproject commit 3420c71cb72f63db8d63164446cca042f354a08e From fd67ca0ab02c3fb22c078d22b332d872523b94f0 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 16 Oct 2024 22:17:28 +0800 Subject: [PATCH 058/157] Update pinned versions for cudf 3420c71cb72f63db8d63164446cca042f354a08e (#2509) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf-pins/versions.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index df743054ff..7be36774f6 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "22668fa1d9ea5918f463c52bcdcb5ef181e5d1d0", + "git_tag" : "a34d6bf039b945cfe4e65993373b28e153abbaa7", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, @@ -149,7 +149,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "1b70ffdd5ab460ac481f1575c42e8c1fccfda792", + "git_tag" : "de42f5711386f6b914cef0fc54d3081a936c5740", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From e53547bb6b4e4b02a4f28c64f4958a56fa760bf1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 16 Oct 2024 11:14:26 -0500 Subject: [PATCH 059/157] Bump org.apache.hadoop:hadoop-common from 3.2.4 to 3.4.0 (#2432) Bumps org.apache.hadoop:hadoop-common from 3.2.4 to 3.4.0. --- updated-dependencies: - dependency-name: org.apache.hadoop:hadoop-common dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a50feefb22..641bb25e90 100644 --- a/pom.xml +++ b/pom.xml @@ -94,7 +94,7 @@ ${cuda.version} ${project.basedir}/thirdparty/cudf ${project.basedir}/thirdparty/cudf-pins/ - 3.2.4 + 3.4.0 5.8.1 ${project.build.directory}/libcudf/cmake-build/ false From 252edb89e6f63e00bbce6c17e5e189d396ae4cea Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 17 Oct 2024 12:19:35 +0800 Subject: [PATCH 060/157] [submodule-sync] bot-submodule-sync-branch-24.12 to branch-24.12 [skip ci] [bot] (#2511) * Update submodule cudf to c9202a0797c1b23f02edbdef34d292ebfd74117f Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update pinned versions for cudf c9202a0797c1b23f02edbdef34d292ebfd74117f Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3420c71cb7..c9202a0797 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3420c71cb72f63db8d63164446cca042f354a08e +Subproject commit c9202a0797c1b23f02edbdef34d292ebfd74117f diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index 1073147e4f..f098825f05 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae +61bcb7d39c5aad77100ab5733cbdddf1651dbe11 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 7be36774f6..084117b584 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -149,7 +149,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "de42f5711386f6b914cef0fc54d3081a936c5740", + "git_tag" : "50e60a868af05cc9f65b9980753d708e7170f3a1", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From 24fafddd0b8aa2590bc735e64f19f9eea8fa9fae Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:51:07 +0800 Subject: [PATCH 061/157] Update submodule cudf to 3683e4685ff0f0bc8122fe654742f708bf9fdbcc (#2512) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index c9202a0797..3683e4685f 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit c9202a0797c1b23f02edbdef34d292ebfd74117f +Subproject commit 3683e4685ff0f0bc8122fe654742f708bf9fdbcc From 6d2c0928156d6d76db3ed57856014de37c434dd4 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 18 Oct 2024 02:55:23 +0800 Subject: [PATCH 062/157] Update submodule cudf to 14209c1962f1615f82f2c5be1cdbf58a6ed05789 (#2513) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3683e4685f..14209c1962 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3683e4685ff0f0bc8122fe654742f708bf9fdbcc +Subproject commit 14209c1962f1615f82f2c5be1cdbf58a6ed05789 From 1beb0c80c3bcf4b6cf187cd2bcb1984939ad53a0 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 18 Oct 2024 07:03:31 +0800 Subject: [PATCH 063/157] Update submodule cudf to 00feb82cbda10bf65343e08d54ed9e893ff4aa71 (#2514) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 14209c1962..00feb82cbd 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 14209c1962f1615f82f2c5be1cdbf58a6ed05789 +Subproject commit 00feb82cbda10bf65343e08d54ed9e893ff4aa71 From 8a672b6de55566b7b6557b8dc29b73ee21a0b17c Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:55:12 +0800 Subject: [PATCH 064/157] Update submodule cudf to ce93c366c451e27a49583cbb809bf5579a4bcf15 (#2515) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 00feb82cbd..ce93c366c4 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 00feb82cbda10bf65343e08d54ed9e893ff4aa71 +Subproject commit ce93c366c451e27a49583cbb809bf5579a4bcf15 From 797101fa2a365c706fc96fd3339cab5b5b4c3257 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 18 Oct 2024 16:50:36 +0800 Subject: [PATCH 065/157] Update submodule cudf to b8917229f8a2446c7e5f697475f76743a05e6856 (#2516) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index ce93c366c4..b8917229f8 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit ce93c366c451e27a49583cbb809bf5579a4bcf15 +Subproject commit b8917229f8a2446c7e5f697475f76743a05e6856 From 4765d5c1cbcda4ab132e9331b4746696ece7daae Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:16:22 -0700 Subject: [PATCH 066/157] Use `cudf::make_strings_column_batch` in `get_json_object` (#2499) * Testing Signed-off-by: Nghia Truong * Use `make_strings_column_batch` Signed-off-by: Nghia Truong --------- Signed-off-by: Nghia Truong --- src/main/cpp/src/get_json_object.cu | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/main/cpp/src/get_json_object.cu b/src/main/cpp/src/get_json_object.cu index 622a56bc55..8fce46bca4 100644 --- a/src/main/cpp/src/get_json_object.cu +++ b/src/main/cpp/src/get_json_object.cu @@ -1019,7 +1019,6 @@ std::vector> get_json_object_batch( construct_path_commands(json_paths, stream); auto const num_outputs = json_paths.size(); - std::vector> output; // The error check array contains markers denoting if there is any out-of-bound write occurs // (first `num_outputs` elements), or if the nesting depth exceeded its limits (the last element). @@ -1052,19 +1051,23 @@ std::vector> get_json_object_batch( auto d_path_data = cudf::detail::make_device_uvector_async( h_path_data, stream, rmm::mr::get_current_device_resource()); thrust::uninitialized_fill( - rmm::exec_policy(stream), d_error_check.begin(), d_error_check.end(), 0); + rmm::exec_policy_nosync(stream), d_error_check.begin(), d_error_check.end(), 0); kernel_launcher::exec(input, d_path_data, d_max_path_depth_exceeded, stream); auto h_error_check = cudf::detail::make_host_vector_sync(d_error_check, stream); auto has_no_oob = check_error(h_error_check); + std::vector const>> + batch_stringviews; + batch_stringviews.reserve(out_stringviews.size()); + // If we didn't see any out-of-bound write, everything is good so far. // Just gather the output strings and return. if (has_no_oob) { for (auto const& out_sview : out_stringviews) { - output.emplace_back(cudf::make_strings_column(out_sview, stream, mr)); + batch_stringviews.emplace_back(out_sview); } - return output; + return cudf::make_strings_column_batch(batch_stringviews, stream, mr); } // From here, we had out-of-bound write. Although this is very rare, it may still happen. @@ -1072,6 +1075,7 @@ std::vector> get_json_object_batch( std::vector, int64_t>> out_offsets_and_sizes; std::vector> out_char_buffers; std::vector oob_indices; + std::vector no_oob_indices; // Check validity from the stored char pointers. auto const validator = [] __device__(thrust::pair const item) { @@ -1085,7 +1089,6 @@ std::vector> get_json_object_batch( if (h_error_check[idx]) { oob_indices.emplace_back(idx); - output.emplace_back(nullptr); // just placeholder. out_null_masks_and_null_counts.emplace_back( cudf::detail::valid_if(out_sview.begin(), out_sview.end(), validator, stream, mr)); @@ -1111,9 +1114,18 @@ std::vector> get_json_object_batch( out_char_buffers.back().data(), d_error_check.data() + idx}); } else { - output.emplace_back(cudf::make_strings_column(out_sview, stream, mr)); + no_oob_indices.emplace_back(idx); + batch_stringviews.emplace_back(out_sview); } } + + std::vector> output(num_outputs); + auto no_oob_output = cudf::make_strings_column_batch(batch_stringviews, stream, mr); + for (std::size_t idx = 0; idx < no_oob_indices.size(); ++idx) { + auto const out_idx = no_oob_indices[idx]; + output[out_idx] = std::move(no_oob_output[idx]); + } + // These buffers are no longer needed. scratch_buffers.clear(); out_stringviews.clear(); @@ -1122,7 +1134,7 @@ std::vector> get_json_object_batch( d_path_data = cudf::detail::make_device_uvector_async( h_path_data, stream, rmm::mr::get_current_device_resource()); thrust::uninitialized_fill( - rmm::exec_policy(stream), d_error_check.begin(), d_error_check.end(), 0); + rmm::exec_policy_nosync(stream), d_error_check.begin(), d_error_check.end(), 0); kernel_launcher::exec(input, d_path_data, d_max_path_depth_exceeded, stream); h_error_check = cudf::detail::make_host_vector_sync(d_error_check, stream); has_no_oob = check_error(h_error_check); From 340e27198110bed23d05dcddfde5b488bce10662 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 19 Oct 2024 07:00:49 +0800 Subject: [PATCH 067/157] Update submodule cudf to 6ad90742f5a1efa5eecbbad25dddc46c1ed5c801 (#2517) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index b8917229f8..6ad90742f5 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit b8917229f8a2446c7e5f697475f76743a05e6856 +Subproject commit 6ad90742f5a1efa5eecbbad25dddc46c1ed5c801 From 8913882710abaf7e1df507157abfcf7e19c20ea0 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 19 Oct 2024 10:45:05 +0800 Subject: [PATCH 068/157] Update submodule cudf to 98eef67d12670bd592022201b3c9dcc12374a34a (#2518) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 6ad90742f5..98eef67d12 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 6ad90742f5a1efa5eecbbad25dddc46c1ed5c801 +Subproject commit 98eef67d12670bd592022201b3c9dcc12374a34a From 3aa3421a3202523c2f0d742fa5fb3977b6ffe387 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 19 Oct 2024 16:45:32 +0800 Subject: [PATCH 069/157] Update submodule cudf to fdd2b262aa76400d3d57018461eba37892445a4b (#2519) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 98eef67d12..fdd2b262aa 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 98eef67d12670bd592022201b3c9dcc12374a34a +Subproject commit fdd2b262aa76400d3d57018461eba37892445a4b From 5a7c5ce0bb2869a445c044a431f8b0e66c6db9a3 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sun, 20 Oct 2024 06:46:41 +0800 Subject: [PATCH 070/157] Update submodule cudf to 1ce2526bde7f77d2da7d0927a052fd9ccf69b9f2 (#2520) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index fdd2b262aa..1ce2526bde 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit fdd2b262aa76400d3d57018461eba37892445a4b +Subproject commit 1ce2526bde7f77d2da7d0927a052fd9ccf69b9f2 From d0a55aa3f43d43537eaff50b655e7a7e20360a7d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sun, 20 Oct 2024 12:22:06 +0800 Subject: [PATCH 071/157] Update submodule cudf to 074ab749531aa136c546afc7837fec0b404fe022 (#2521) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 1ce2526bde..074ab74953 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1ce2526bde7f77d2da7d0927a052fd9ccf69b9f2 +Subproject commit 074ab749531aa136c546afc7837fec0b404fe022 From ae6b48c6aa9e774fbb9130964d86a71edbc74ea6 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 22 Oct 2024 08:24:12 +0800 Subject: [PATCH 072/157] Update pinned versions for cudf 074ab749531aa136c546afc7837fec0b404fe022 (#2523) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf-pins/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 084117b584..dff6415001 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "a34d6bf039b945cfe4e65993373b28e153abbaa7", + "git_tag" : "f2a056710e6b614cf7dfec17c2e860acd2eddbcc", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From 5fe13b1013244659f8f0592994d5014469b5d3ed Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 22 Oct 2024 10:55:57 +0800 Subject: [PATCH 073/157] [submodule-sync] bot-submodule-sync-branch-24.12 to branch-24.12 [skip ci] [bot] (#2524) * Update submodule cudf to 69ca3874b97e9cce6efb71e3e33ec598b57908a3 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update pinned versions for cudf 69ca3874b97e9cce6efb71e3e33ec598b57908a3 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 074ab74953..69ca3874b9 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 074ab749531aa136c546afc7837fec0b404fe022 +Subproject commit 69ca3874b97e9cce6efb71e3e33ec598b57908a3 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index dff6415001..26a1fe58de 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "f2a056710e6b614cf7dfec17c2e860acd2eddbcc", + "git_tag" : "36c5c270990a2fe55f974e7d77bd7b24681629ba", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From a11322db27b585439a178397bad1530ce0c691d0 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:52:24 +0800 Subject: [PATCH 074/157] Update submodule cudf to 637e3206a4656bd38636f3fadf3c4573c7bc906a (#2525) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 69ca3874b9..637e3206a4 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 69ca3874b97e9cce6efb71e3e33ec598b57908a3 +Subproject commit 637e3206a4656bd38636f3fadf3c4573c7bc906a From 75155c498c3b40dd2fd0d42bcb467ccda9573f00 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 23 Oct 2024 00:52:10 +0800 Subject: [PATCH 075/157] [submodule-sync] bot-submodule-sync-branch-24.12 to branch-24.12 [skip ci] [bot] (#2526) * Update submodule cudf to 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update pinned versions for cudf 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 637e3206a4..4fe338c0ef 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 637e3206a4656bd38636f3fadf3c4573c7bc906a +Subproject commit 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 26a1fe58de..2841a245b9 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -149,7 +149,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "50e60a868af05cc9f65b9980753d708e7170f3a1", + "git_tag" : "1024a1250cfde7e93d26dc6d5e063e84c4a39824", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From dffb829212ce4118aecb979ae3c8111274e26c1b Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 23 Oct 2024 06:52:25 +0800 Subject: [PATCH 076/157] Update pinned versions for cudf 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2 (#2527) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index f098825f05..da52b90f5a 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -61bcb7d39c5aad77100ab5733cbdddf1651dbe11 +ab9f5097b8cef743a6a5d1df1b75863054b47464 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 2841a245b9..4315fe9acf 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "36c5c270990a2fe55f974e7d77bd7b24681629ba", + "git_tag" : "fcf4b155314184e7f9ce1fa5209ca755a80a4867", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From 9c4061a0830be5377267d3bd3fd50d57e2a69c30 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 23 Oct 2024 11:05:25 +0800 Subject: [PATCH 077/157] [submodule-sync] bot-submodule-sync-branch-24.12 to branch-24.12 [skip ci] [bot] (#2528) * Update submodule cudf to cff1296845aa9a4078dd0d95dd30b7e7c004f2d9 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update pinned versions for cudf cff1296845aa9a4078dd0d95dd30b7e7c004f2d9 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/rapids-cmake.sha | 2 +- thirdparty/cudf-pins/versions.json | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 4fe338c0ef..cff1296845 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2 +Subproject commit cff1296845aa9a4078dd0d95dd30b7e7c004f2d9 diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha index da52b90f5a..9aba80689c 100644 --- a/thirdparty/cudf-pins/rapids-cmake.sha +++ b/thirdparty/cudf-pins/rapids-cmake.sha @@ -1 +1 @@ -ab9f5097b8cef743a6a5d1df1b75863054b47464 +e5897d8093393e263ad43d4ecbffe48b6a07ecbb diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 4315fe9acf..9fea155e9c 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "fcf4b155314184e7f9ce1fa5209ca755a80a4867", + "git_tag" : "52b672b6405f6312108263c289c3b042eb0bd50b", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, @@ -60,7 +60,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6", + "git_tag" : "dc0f9fc20c2a544e53099e640a681b347532391a", "git_url" : "https://github.com/NVIDIA/cuCollections.git", "version" : "0.0.1" }, From 156ad0c257e71c11f0bbba83196b809265b9c101 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 23 Oct 2024 22:23:06 +0800 Subject: [PATCH 078/157] Update submodule cudf to 3126f775c527a8df65df2e2cbc8c2b73da2219bf (#2529) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index cff1296845..3126f775c5 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit cff1296845aa9a4078dd0d95dd30b7e7c004f2d9 +Subproject commit 3126f775c527a8df65df2e2cbc8c2b73da2219bf From 7b65899ef8a74536ad44fef0f44b8244c3ad4cde Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 24 Oct 2024 06:50:45 +0800 Subject: [PATCH 079/157] [submodule-sync] bot-submodule-sync-branch-24.12 to branch-24.12 [skip ci] [bot] (#2530) * Update submodule cudf to e7653a70743a76ad3c8ca4b377aa0ec4303e5556 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update pinned versions for cudf e7653a70743a76ad3c8ca4b377aa0ec4303e5556 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3126f775c5..e7653a7074 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3126f775c527a8df65df2e2cbc8c2b73da2219bf +Subproject commit e7653a70743a76ad3c8ca4b377aa0ec4303e5556 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 9fea155e9c..d96a081e72 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "52b672b6405f6312108263c289c3b042eb0bd50b", + "git_tag" : "7715e36fcd3040f70a5d1edccf28a266bd572fd5", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From f08cedfebc0f7fe54c5c4237ae7b67622a605964 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 24 Oct 2024 10:47:10 +0800 Subject: [PATCH 080/157] Update submodule cudf to d7cdf44da2ba921c6fa63feff8749d141643f76e (#2531) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index e7653a7074..d7cdf44da2 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit e7653a70743a76ad3c8ca4b377aa0ec4303e5556 +Subproject commit d7cdf44da2ba921c6fa63feff8749d141643f76e From d7b503583e47317a6c4f8c203306773350329cde Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Thu, 24 Oct 2024 16:54:24 +0800 Subject: [PATCH 081/157] Update pinned versions for cudf d7cdf44da2ba921c6fa63feff8749d141643f76e (#2533) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf-pins/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index d96a081e72..228094b53a 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "7715e36fcd3040f70a5d1edccf28a266bd572fd5", + "git_tag" : "9b077e51c778e7b05bee27fa52a5ecae62e00bb4", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From d7e66ecba6c6c22a35ac227236b48ef617c059f8 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 25 Oct 2024 02:18:43 +0800 Subject: [PATCH 082/157] Update submodule cudf to 3a623149827ec347e721dd1a18072f18b0b4bcc1 (#2535) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index d7cdf44da2..3a62314982 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit d7cdf44da2ba921c6fa63feff8749d141643f76e +Subproject commit 3a623149827ec347e721dd1a18072f18b0b4bcc1 From 1ba93499e655af155a27dc43e3ba68dd7cf76f56 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 25 Oct 2024 06:46:49 +0800 Subject: [PATCH 083/157] Update submodule cudf to 7115f20e91a314f07333cbd5c01adc62bf2fbb0c (#2536) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3a62314982..7115f20e91 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3a623149827ec347e721dd1a18072f18b0b4bcc1 +Subproject commit 7115f20e91a314f07333cbd5c01adc62bf2fbb0c From 64635ecf7f0fa1d79c1ab4e06f3112bbcac88aae Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Fri, 25 Oct 2024 18:12:40 +0800 Subject: [PATCH 084/157] Update pinned versions for cudf 7115f20e91a314f07333cbd5c01adc62bf2fbb0c (#2537) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf-pins/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 228094b53a..72831a48d8 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "9b077e51c778e7b05bee27fa52a5ecae62e00bb4", + "git_tag" : "dde7115b7a169bcc430b811225ccbac3711d7901", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From 6f2b12c11637c1dc2cb789683452fd0d70e3c000 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 26 Oct 2024 00:51:41 +0800 Subject: [PATCH 085/157] [submodule-sync] bot-submodule-sync-branch-24.12 to branch-24.12 [skip ci] [bot] (#2538) * Update submodule cudf to e98e6b9209ff8557d85cb9b828b895884b0c7b7a Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update pinned versions for cudf e98e6b9209ff8557d85cb9b828b895884b0c7b7a Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 7115f20e91..e98e6b9209 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 7115f20e91a314f07333cbd5c01adc62bf2fbb0c +Subproject commit e98e6b9209ff8557d85cb9b828b895884b0c7b7a diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index 72831a48d8..bb06629838 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "dde7115b7a169bcc430b811225ccbac3711d7901", + "git_tag" : "40dced5c6b9e3051722fc76554e83f405a462467", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, From c56716b420cb005385c632dfd9d5b0f22edefaa2 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Sat, 26 Oct 2024 06:46:34 +0800 Subject: [PATCH 086/157] [submodule-sync] bot-submodule-sync-branch-24.12 to branch-24.12 [skip ci] [bot] (#2539) * Update submodule cudf to 8bc9f19ebbb57bbc9bfa98efd94c8d7f8c65d316 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update pinned versions for cudf 8bc9f19ebbb57bbc9bfa98efd94c8d7f8c65d316 Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index e98e6b9209..8bc9f19ebb 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit e98e6b9209ff8557d85cb9b828b895884b0c7b7a +Subproject commit 8bc9f19ebbb57bbc9bfa98efd94c8d7f8c65d316 diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index bb06629838..d333cfb05b 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -149,7 +149,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "1024a1250cfde7e93d26dc6d5e063e84c4a39824", + "git_tag" : "1ebfe0a4ee5f83a2ad54afcf99716944d20598dd", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From 2a04c9f2e06ab722a42f2ec85f9f848d1cb377f5 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 29 Oct 2024 00:51:21 +0800 Subject: [PATCH 087/157] Update submodule cudf to 8c4d1f201043a6802598bea3dcb58fa1e061d9e5 (#2540) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 8bc9f19ebb..8c4d1f2010 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 8bc9f19ebbb57bbc9bfa98efd94c8d7f8c65d316 +Subproject commit 8c4d1f201043a6802598bea3dcb58fa1e061d9e5 From ed440b96a1734a31c88b1de50ef8e51d9211666d Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 29 Oct 2024 10:46:32 +0800 Subject: [PATCH 088/157] Update submodule cudf to 1ad9fc1feef0ea0ee38adaa8f05cde6bb05aff0f (#2543) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 8c4d1f2010..1ad9fc1fee 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 8c4d1f201043a6802598bea3dcb58fa1e061d9e5 +Subproject commit 1ad9fc1feef0ea0ee38adaa8f05cde6bb05aff0f From ee0716485ee15f8ad9dda0755806b35055450b5c Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Tue, 29 Oct 2024 16:52:24 +0800 Subject: [PATCH 089/157] Update submodule cudf to bf5b778c265b3bfa712f509be0ba268216bcf3d0 (#2544) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 1ad9fc1fee..bf5b778c26 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 1ad9fc1feef0ea0ee38adaa8f05cde6bb05aff0f +Subproject commit bf5b778c265b3bfa712f509be0ba268216bcf3d0 From 0f326603ecd19acf4cc13256257a6a9f2f6ec197 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 30 Oct 2024 00:56:04 +0800 Subject: [PATCH 090/157] Update submodule cudf to 3775f7b9f6509bd0f2f75c46edb60abf2522de86 (#2545) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index bf5b778c26..3775f7b9f6 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit bf5b778c265b3bfa712f509be0ba268216bcf3d0 +Subproject commit 3775f7b9f6509bd0f2f75c46edb60abf2522de86 From 02a5b34a037a1fc7be24b2de976303e7a2bca4d0 Mon Sep 17 00:00:00 2001 From: Zach Puller Date: Tue, 29 Oct 2024 12:40:56 -0700 Subject: [PATCH 091/157] fix max bytes dealloc bug (#2541) Signed-off-by: Zach Puller --- src/main/cpp/src/SparkResourceAdaptorJni.cpp | 2 +- .../java/com/nvidia/spark/rapids/jni/RmmSparkTest.java | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/cpp/src/SparkResourceAdaptorJni.cpp b/src/main/cpp/src/SparkResourceAdaptorJni.cpp index 8eeb047ddc..e09ef0dfdb 100644 --- a/src/main/cpp/src/SparkResourceAdaptorJni.cpp +++ b/src/main/cpp/src/SparkResourceAdaptorJni.cpp @@ -1780,6 +1780,7 @@ class spark_resource_adaptor final : public rmm::mr::device_memory_resource { auto const thread = threads.find(tid); if (thread != threads.end()) { log_status("DEALLOC", tid, thread->second.task_id, thread->second.state); + if (!is_for_cpu) { thread->second.gpu_memory_allocated_bytes -= num_bytes; } } else { log_status("DEALLOC", tid, -2, thread_state::UNKNOWN); } @@ -1802,7 +1803,6 @@ class spark_resource_adaptor final : public rmm::mr::device_memory_resource { if (is_for_cpu == t_state.is_cpu_alloc) { transition(t_state, thread_state::THREAD_ALLOC_FREE); } - if (!is_for_cpu) { t_state.gpu_memory_allocated_bytes -= num_bytes; } break; default: break; } diff --git a/src/test/java/com/nvidia/spark/rapids/jni/RmmSparkTest.java b/src/test/java/com/nvidia/spark/rapids/jni/RmmSparkTest.java index 987dd58534..270a4266cd 100644 --- a/src/test/java/com/nvidia/spark/rapids/jni/RmmSparkTest.java +++ b/src/test/java/com/nvidia/spark/rapids/jni/RmmSparkTest.java @@ -360,7 +360,7 @@ public void testInsertOOMsGpu() { assertThrows(GpuSplitAndRetryOOM.class, () -> Rmm.alloc(100).close()); assertEquals(0, RmmSpark.getAndResetNumRetryThrow(taskid)); assertEquals(1, RmmSpark.getAndResetNumSplitRetryThrow(taskid)); - assertEquals(ALIGNMENT * 2, RmmSpark.getAndResetGpuMaxMemoryAllocated(taskid)); + assertEquals(ALIGNMENT, RmmSpark.getAndResetGpuMaxMemoryAllocated(taskid)); // Verify that injecting OOM does not cause the block to actually happen assertEquals(RmmSparkThreadState.THREAD_RUNNING, RmmSpark.getStateOf(threadId)); @@ -818,6 +818,11 @@ public void testBasicMixedBlocking() throws ExecutionException, InterruptedExcep secondGpuAlloc.waitForAlloc(); secondGpuAlloc.freeAndWait(); } + // Do one more alloc after freeing on same task to show the max allocation metric is unimpacted + try (AllocOnAnotherThread secondGpuAlloc = new GpuAllocOnAnotherThread(taskThree, FIVE_MB)) { + secondGpuAlloc.waitForAlloc(); + secondGpuAlloc.freeAndWait(); + } } } } finally { From 47e1738c0936491779dc4a0811a4a905ddee1bc3 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 30 Oct 2024 08:22:40 +0800 Subject: [PATCH 092/157] [submodule-sync] bot-submodule-sync-branch-24.12 to branch-24.12 [skip ci] [bot] (#2546) * Update submodule cudf to 8d7b0d8bf0aebebde0a5036d2e51f5991ecbe63b Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> * Update pinned versions for cudf 8d7b0d8bf0aebebde0a5036d2e51f5991ecbe63b Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --------- Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 3775f7b9f6..8d7b0d8bf0 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 3775f7b9f6509bd0f2f75c46edb60abf2522de86 +Subproject commit 8d7b0d8bf0aebebde0a5036d2e51f5991ecbe63b diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json index d333cfb05b..371ab5b422 100644 --- a/thirdparty/cudf-pins/versions.json +++ b/thirdparty/cudf-pins/versions.json @@ -44,7 +44,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "40dced5c6b9e3051722fc76554e83f405a462467", + "git_tag" : "10bc842eb9abb7af272f7d31fbeef310743ad062", "git_url" : "https://github.com/rapidsai/kvikio.git", "version" : "24.12" }, @@ -149,7 +149,7 @@ { "always_download" : true, "git_shallow" : false, - "git_tag" : "1ebfe0a4ee5f83a2ad54afcf99716944d20598dd", + "git_tag" : "47dae24b5578894ac0efc3c06930b7a5a069d988", "git_url" : "https://github.com/rapidsai/rmm.git", "version" : "24.12" }, From 6ccc96f432cf500d397ee3f9c67cc728e9085883 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Wed, 30 Oct 2024 09:19:21 +0800 Subject: [PATCH 093/157] Add utility methods for kudo (#2542) Signed-off-by: liurenjie1024 --- .../com/nvidia/spark/rapids/jni/Arms.java | 84 +++++++++++++++++++ .../com/nvidia/spark/rapids/jni/Pair.java | 42 ++++++++++ .../spark/rapids/jni/Preconditions.java | 42 ++++++++++ 3 files changed, 168 insertions(+) create mode 100644 src/main/java/com/nvidia/spark/rapids/jni/Arms.java create mode 100644 src/main/java/com/nvidia/spark/rapids/jni/Pair.java create mode 100644 src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java diff --git a/src/main/java/com/nvidia/spark/rapids/jni/Arms.java b/src/main/java/com/nvidia/spark/rapids/jni/Arms.java new file mode 100644 index 0000000000..4492711b0c --- /dev/null +++ b/src/main/java/com/nvidia/spark/rapids/jni/Arms.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Iterator; +import java.util.function.Function; + +/** + * This class contains utility methods for automatic resource management. + */ +class Arms { + /** + * This method close the resource if an exception is thrown while executing the function. + */ + public static T closeIfException(R resource, Function function) { + try { + return function.apply(resource); + } catch (Exception e) { + if (resource != null) { + try { + resource.close(); + } catch (Exception inner) { + e.addSuppressed(inner); + } + } + throw e; + } + } + + /** + * This method safely closes all the resources. + *

+ * This method will iterate through all the resources and closes them. If any exception happened during the + * traversal, exception will be captured and rethrown after all resources closed. + *

+ */ + public static void closeAll(Iterator resources) { + Throwable t = null; + while (resources.hasNext()) { + try { + resources.next().close(); + } catch (Exception e) { + if (t == null) { + t = e; + } else { + t.addSuppressed(e); + } + } + } + + if (t != null) throw new RuntimeException(t); + } + + + /** + * This method safely closes all the resources. See {@link #closeAll(Iterator)} for more details. + */ + public static void closeAll(R... resources) { + closeAll(Arrays.asList(resources)); + } + + /** + * This method safely closes the resources. See {@link #closeAll(Iterator)} for more details. + */ + public static void closeAll(Collection resources) { + closeAll(resources.iterator()); + } +} diff --git a/src/main/java/com/nvidia/spark/rapids/jni/Pair.java b/src/main/java/com/nvidia/spark/rapids/jni/Pair.java new file mode 100644 index 0000000000..8a0b4b0fee --- /dev/null +++ b/src/main/java/com/nvidia/spark/rapids/jni/Pair.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +/** + * A utility class for holding a pair of values. + */ +class Pair { + private final K left; + private final V right; + + public Pair(K left, V right) { + this.left = left; + this.right = right; + } + + public K getLeft() { + return left; + } + + public V getRight() { + return right; + } + + public static Pair of(K left, V right) { + return new Pair<>(left, right); + } +} diff --git a/src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java b/src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java new file mode 100644 index 0000000000..67473a2e61 --- /dev/null +++ b/src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.jni; + +import java.util.function.Supplier; + +/** + * This class contains utility methods for checking preconditions. + */ +class Preconditions { + /** + * Check if the condition is true, otherwise throw an IllegalStateException with the given message. + */ + public static void ensure(boolean condition, String message) { + if (!condition) { + throw new IllegalStateException(message); + } + } + + /** + * Check if the condition is true, otherwise throw an IllegalStateException with the given message supplier. + */ + public static void ensure(boolean condition, Supplier messageSupplier) { + if (!condition) { + throw new IllegalStateException(messageSupplier.get()); + } + } +} From d0295590dc98e1246da45963ce6f69b46f44ebda Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 30 Oct 2024 10:53:19 +0800 Subject: [PATCH 094/157] Update submodule cudf to eeb4d2780163794f4b705062e49dbdc3283ebce0 (#2547) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index 8d7b0d8bf0..eeb4d27801 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit 8d7b0d8bf0aebebde0a5036d2e51f5991ecbe63b +Subproject commit eeb4d2780163794f4b705062e49dbdc3283ebce0 From b0ed7345c111f78c9e734c7108170a7cba8be4f2 Mon Sep 17 00:00:00 2001 From: Jenkins Automation <70000568+nvauto@users.noreply.github.com> Date: Wed, 30 Oct 2024 22:17:11 +0800 Subject: [PATCH 095/157] Update submodule cudf to 6328ad679947eb5cbc352c345a28f079aa6b8005 (#2549) Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com> --- thirdparty/cudf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thirdparty/cudf b/thirdparty/cudf index eeb4d27801..6328ad6799 160000 --- a/thirdparty/cudf +++ b/thirdparty/cudf @@ -1 +1 @@ -Subproject commit eeb4d2780163794f4b705062e49dbdc3283ebce0 +Subproject commit 6328ad679947eb5cbc352c345a28f079aa6b8005 From c8ff5d638c85cd5af23f60abb968dceb0a381818 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Wed, 30 Oct 2024 14:06:44 -0500 Subject: [PATCH 096/157] Upmerge to a new version of CUDF with a new version of nvcomp (#2550) Signed-off-by: Robert (Bobby) Evans --- ci/submodule-sync.sh | 8 - patches/noop.patch | 0 patches/revert_nvcomp4.patch | 939 ----------------------------- thirdparty/cudf | 2 +- thirdparty/cudf-pins/versions.json | 12 +- 5 files changed, 7 insertions(+), 954 deletions(-) create mode 100644 patches/noop.patch delete mode 100644 patches/revert_nvcomp4.patch diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh index ea7c06b7ec..7f3e468862 100755 --- a/ci/submodule-sync.sh +++ b/ci/submodule-sync.sh @@ -91,14 +91,6 @@ rapids_cmake_sha=$(git -C ${LIBCUDF_BUILD_PATH}/_deps/rapids-cmake-src/ rev-pars echo "Update rapids-cmake pinned SHA1 to ${rapids_cmake_sha}" echo "${rapids_cmake_sha}" > thirdparty/cudf-pins/rapids-cmake.sha -# Bash the wrong nvcomp version to the correct version until -# nvcomp version mismatch is fixed. https://github.com/rapidsai/cudf/issues/16772. -echo "Revert nvcomp to 3.0.6" -sed -i -e 's/4\.0\.1\.0/3.0.6/' \ - -e 's|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz|' \ - -e 's|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz|' \ - thirdparty/cudf-pins/versions.json - # Do the git add after the build so that we get # the updated versions.json generated by the build echo "Update cudf submodule to ${cudf_sha} with updated pinned versions" diff --git a/patches/noop.patch b/patches/noop.patch new file mode 100644 index 0000000000..e69de29bb2 diff --git a/patches/revert_nvcomp4.patch b/patches/revert_nvcomp4.patch deleted file mode 100644 index 914c033088..0000000000 --- a/patches/revert_nvcomp4.patch +++ /dev/null @@ -1,939 +0,0 @@ -diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh -index fb93b06dbe..e5565c4b53 100755 ---- a/ci/build_wheel_cudf.sh -+++ b/ci/build_wheel_cudf.sh -@@ -23,6 +23,8 @@ export PIP_CONSTRAINT="/tmp/constraints.txt" - python -m auditwheel repair \ - --exclude libcudf.so \ - --exclude libnvcomp.so \ -+ --exclude libnvcomp_bitcomp.so \ -+ --exclude libnvcomp_gdeflate.so \ - -w ${package_dir}/final_dist \ - ${package_dir}/dist/* - -diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh -index 5e9f7f8a0c..0e4745bda2 100755 ---- a/ci/build_wheel_pylibcudf.sh -+++ b/ci/build_wheel_pylibcudf.sh -@@ -21,6 +21,8 @@ export PIP_CONSTRAINT="/tmp/constraints.txt" - python -m auditwheel repair \ - --exclude libcudf.so \ - --exclude libnvcomp.so \ -+ --exclude libnvcomp_bitcomp.so \ -+ --exclude libnvcomp_gdeflate.so \ - -w ${package_dir}/final_dist \ - ${package_dir}/dist/* - -diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml -index bd5e6c3d56..74ca3fda1a 100644 ---- a/conda/environments/all_cuda-118_arch-x86_64.yaml -+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml -@@ -58,7 +58,7 @@ dependencies: - - numpy>=1.23,<3.0a0 - - numpydoc - - nvcc_linux-64=11.8 --- nvcomp==4.0.1 -+- nvcomp==3.0.6 - - nvtx>=0.2.1 - - openpyxl - - packaging -diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml -index 565a3ebfa3..22619acf4a 100644 ---- a/conda/environments/all_cuda-125_arch-x86_64.yaml -+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml -@@ -56,7 +56,7 @@ dependencies: - - numba-cuda>=0.0.13 - - numpy>=1.23,<3.0a0 - - numpydoc --- nvcomp==4.0.1 -+- nvcomp==3.0.6 - - nvtx>=0.2.1 - - openpyxl - - packaging -@@ -67,7 +67,7 @@ dependencies: - - pre-commit - - pyarrow>=14.0.0,<18.0.0a0 - - pydata-sphinx-theme!=0.14.2 --- pynvjitlink>=0.0.0a0 -+- pynvjitlink - - pytest-benchmark - - pytest-cases>=3.8.2 - - pytest-cov -diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml -index dc75eb4b25..67d501d746 100644 ---- a/conda/recipes/libcudf/conda_build_config.yaml -+++ b/conda/recipes/libcudf/conda_build_config.yaml -@@ -35,7 +35,7 @@ spdlog_version: - - ">=1.14.1,<1.15" - - nvcomp_version: -- - "=4.0.1" -+ - "=3.0.6" - - zlib_version: - - ">=1.2.13" -diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp -index 0d74a4158a..f3260d0cb5 100644 ---- a/cpp/include/cudf/io/nvcomp_adapter.hpp -+++ b/cpp/include/cudf/io/nvcomp_adapter.hpp -@@ -24,7 +24,7 @@ - namespace CUDF_EXPORT cudf { - namespace io::nvcomp { - --enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4, GZIP }; -+enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 }; - - /** - * @brief Set of parameters that impact whether nvCOMP features are enabled. -@@ -36,20 +36,33 @@ struct feature_status_parameters { - int lib_patch_version; ///< patch version - bool are_all_integrations_enabled; ///< all integrations - bool are_stable_integrations_enabled; ///< stable integrations -+ int compute_capability_major; ///< cuda compute major version - - /** -- * @brief Default constructor using the current version of nvcomp and current environment -- * variables -+ * @brief Default Constructor - */ - feature_status_parameters(); - - /** -- * @brief Constructor using the current version of nvcomp -+ * @brief feature_status_parameters Constructor - * -+ * @param major positive integer representing major value of nvcomp -+ * @param minor positive integer representing minor value of nvcomp -+ * @param patch positive integer representing patch value of nvcomp - * @param all_enabled if all integrations are enabled - * @param stable_enabled if stable integrations are enabled -+ * @param cc_major CUDA compute capability - */ -- feature_status_parameters(bool all_enabled, bool stable_enabled); -+ feature_status_parameters( -+ int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major) -+ : lib_major_version{major}, -+ lib_minor_version{minor}, -+ lib_patch_version{patch}, -+ are_all_integrations_enabled{all_enabled}, -+ are_stable_integrations_enabled{stable_enabled}, -+ compute_capability_major{cc_major} -+ { -+ } - }; - - /** -@@ -61,7 +74,8 @@ inline bool operator==(feature_status_parameters const& lhs, feature_status_para - lhs.lib_minor_version == rhs.lib_minor_version and - lhs.lib_patch_version == rhs.lib_patch_version and - lhs.are_all_integrations_enabled == rhs.are_all_integrations_enabled and -- lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled; -+ lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled and -+ lhs.compute_capability_major == rhs.compute_capability_major; - } - - /** -diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp -index c3187f73a9..3191e8f015 100644 ---- a/cpp/src/io/comp/nvcomp_adapter.cpp -+++ b/cpp/src/io/comp/nvcomp_adapter.cpp -@@ -22,46 +22,94 @@ - #include - #include - --#include --#include - #include - #include --#include - - #include - -+#define NVCOMP_DEFLATE_HEADER -+#if __has_include(NVCOMP_DEFLATE_HEADER) -+#include NVCOMP_DEFLATE_HEADER -+#endif -+ -+#define NVCOMP_ZSTD_HEADER -+#if __has_include(NVCOMP_ZSTD_HEADER) -+#include NVCOMP_ZSTD_HEADER -+#endif -+ -+// When building with nvcomp 4.0 or newer, map the new version macros to the old ones -+#ifndef NVCOMP_MAJOR_VERSION -+#define NVCOMP_MAJOR_VERSION NVCOMP_VER_MAJOR -+#define NVCOMP_MINOR_VERSION NVCOMP_VER_MINOR -+#define NVCOMP_PATCH_VERSION NVCOMP_VER_PATCH -+#endif -+ -+#define NVCOMP_HAS_ZSTD_DECOMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 3)) -+ -+#define NVCOMP_HAS_ZSTD_COMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 4)) -+ -+#define NVCOMP_HAS_DEFLATE(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 5)) -+ -+#define NVCOMP_HAS_DECOMP_TEMPSIZE_EX(MAJOR, MINOR, PATCH) \ -+ (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 1)) -+ -+#define NVCOMP_HAS_COMP_TEMPSIZE_EX(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 6)) -+ -+// ZSTD is stable for nvcomp 2.3.2 or newer -+#define NVCOMP_ZSTD_DECOMP_IS_STABLE(MAJOR, MINOR, PATCH) \ -+ (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 2)) -+ - namespace cudf::io::nvcomp { - - // Dispatcher for nvcompBatchedDecompressGetTempSizeEx - template --auto batched_decompress_get_temp_size_ex(compression_type compression, Args&&... args) -+std::optional batched_decompress_get_temp_size_ex(compression_type compression, -+ Args&&... args) - { -+#if NVCOMP_HAS_DECOMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - switch (compression) { - case compression_type::SNAPPY: - return nvcompBatchedSnappyDecompressGetTempSizeEx(std::forward(args)...); - case compression_type::ZSTD: -+#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward(args)...); -+#else -+ return std::nullopt; -+#endif - case compression_type::LZ4: - return nvcompBatchedLZ4DecompressGetTempSizeEx(std::forward(args)...); -- case compression_type::DEFLATE: -- return nvcompBatchedDeflateDecompressGetTempSizeEx(std::forward(args)...); -- case compression_type::GZIP: -- return nvcompBatchedGzipDecompressGetTempSizeEx(std::forward(args)...); -- default: CUDF_FAIL("Unsupported compression type"); -+ case compression_type::DEFLATE: [[fallthrough]]; -+ default: return std::nullopt; - } -+#endif -+ return std::nullopt; - } --size_t batched_decompress_temp_size(compression_type compression, -- size_t num_chunks, -- size_t max_uncomp_chunk_size, -- size_t max_total_uncomp_size) --{ -- size_t temp_size = 0; -- nvcompStatus_t nvcomp_status = batched_decompress_get_temp_size_ex( -- compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size); - -- CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, -- "Unable to get scratch size for decompression"); -- return temp_size; -+// Dispatcher for nvcompBatchedDecompressGetTempSize -+template -+auto batched_decompress_get_temp_size(compression_type compression, Args&&... args) -+{ -+ switch (compression) { -+ case compression_type::SNAPPY: -+ return nvcompBatchedSnappyDecompressGetTempSize(std::forward(args)...); -+ case compression_type::ZSTD: -+#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) -+ return nvcompBatchedZstdDecompressGetTempSize(std::forward(args)...); -+#else -+ CUDF_FAIL("Decompression error: " + -+ nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD).value()); -+#endif -+ case compression_type::DEFLATE: -+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) -+ return nvcompBatchedDeflateDecompressGetTempSize(std::forward(args)...); -+#else -+ CUDF_FAIL("Decompression error: " + -+ nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE).value()); -+#endif -+ case compression_type::LZ4: -+ return nvcompBatchedLZ4DecompressGetTempSize(std::forward(args)...); -+ default: CUDF_FAIL("Unsupported compression type"); -+ } - } - - // Dispatcher for nvcompBatchedDecompressAsync -@@ -72,12 +120,20 @@ auto batched_decompress_async(compression_type compression, Args&&... args) - case compression_type::SNAPPY: - return nvcompBatchedSnappyDecompressAsync(std::forward(args)...); - case compression_type::ZSTD: -+#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - return nvcompBatchedZstdDecompressAsync(std::forward(args)...); -+#else -+ CUDF_FAIL("Decompression error: " + -+ nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD).value()); -+#endif - case compression_type::DEFLATE: -+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - return nvcompBatchedDeflateDecompressAsync(std::forward(args)...); -+#else -+ CUDF_FAIL("Decompression error: " + -+ nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE).value()); -+#endif - case compression_type::LZ4: return nvcompBatchedLZ4DecompressAsync(std::forward(args)...); -- case compression_type::GZIP: -- return nvcompBatchedGzipDecompressAsync(std::forward(args)...); - default: CUDF_FAIL("Unsupported compression type"); - } - } -@@ -89,11 +145,31 @@ std::string compression_type_name(compression_type compression) - case compression_type::ZSTD: return "Zstandard"; - case compression_type::DEFLATE: return "Deflate"; - case compression_type::LZ4: return "LZ4"; -- case compression_type::GZIP: return "GZIP"; - } - return "compression_type(" + std::to_string(static_cast(compression)) + ")"; - } - -+size_t batched_decompress_temp_size(compression_type compression, -+ size_t num_chunks, -+ size_t max_uncomp_chunk_size, -+ size_t max_total_uncomp_size) -+{ -+ size_t temp_size = 0; -+ auto nvcomp_status = batched_decompress_get_temp_size_ex( -+ compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size); -+ -+ if (nvcomp_status.value_or(nvcompStatus_t::nvcompErrorInternal) != -+ nvcompStatus_t::nvcompSuccess) { -+ nvcomp_status = -+ batched_decompress_get_temp_size(compression, num_chunks, max_uncomp_chunk_size, &temp_size); -+ } -+ -+ CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, -+ "Unable to get scratch size for decompression"); -+ -+ return temp_size; -+} -+ - void batched_decompress(compression_type compression, - device_span const> inputs, - device_span const> outputs, -@@ -128,10 +204,54 @@ void batched_decompress(compression_type compression, - update_compression_results(nvcomp_statuses, actual_uncompressed_data_sizes, results, stream); - } - --size_t batched_compress_temp_size(compression_type compression, -- size_t batch_size, -- size_t max_uncompressed_chunk_bytes, -- size_t max_total_uncompressed_bytes) -+// Wrapper for nvcompBatchedCompressGetTempSize -+auto batched_compress_get_temp_size(compression_type compression, -+ size_t batch_size, -+ size_t max_uncompressed_chunk_bytes) -+{ -+ size_t temp_size = 0; -+ nvcompStatus_t nvcomp_status = nvcompStatus_t::nvcompSuccess; -+ switch (compression) { -+ case compression_type::SNAPPY: -+ nvcomp_status = nvcompBatchedSnappyCompressGetTempSize( -+ batch_size, max_uncompressed_chunk_bytes, nvcompBatchedSnappyDefaultOpts, &temp_size); -+ break; -+ case compression_type::DEFLATE: -+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) -+ nvcomp_status = nvcompBatchedDeflateCompressGetTempSize( -+ batch_size, max_uncompressed_chunk_bytes, nvcompBatchedDeflateDefaultOpts, &temp_size); -+ break; -+#else -+ CUDF_FAIL("Compression error: " + -+ nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value()); -+#endif -+ case compression_type::ZSTD: -+#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) -+ nvcomp_status = nvcompBatchedZstdCompressGetTempSize( -+ batch_size, max_uncompressed_chunk_bytes, nvcompBatchedZstdDefaultOpts, &temp_size); -+ break; -+#else -+ CUDF_FAIL("Compression error: " + -+ nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); -+#endif -+ case compression_type::LZ4: -+ nvcomp_status = nvcompBatchedLZ4CompressGetTempSize( -+ batch_size, max_uncompressed_chunk_bytes, nvcompBatchedLZ4DefaultOpts, &temp_size); -+ break; -+ default: CUDF_FAIL("Unsupported compression type"); -+ } -+ -+ CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, -+ "Unable to get scratch size for compression"); -+ return temp_size; -+} -+ -+#if NVCOMP_HAS_COMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) -+// Wrapper for nvcompBatchedCompressGetTempSizeEx -+auto batched_compress_get_temp_size_ex(compression_type compression, -+ size_t batch_size, -+ size_t max_uncompressed_chunk_bytes, -+ size_t max_total_uncompressed_bytes) - { - size_t temp_size = 0; - nvcompStatus_t nvcomp_status = nvcompStatus_t::nvcompSuccess; -@@ -171,8 +291,28 @@ size_t batched_compress_temp_size(compression_type compression, - "Unable to get scratch size for compression"); - return temp_size; - } -+#endif -+ -+size_t batched_compress_temp_size(compression_type compression, -+ size_t num_chunks, -+ size_t max_uncomp_chunk_size, -+ size_t max_total_uncomp_size) -+{ -+#if NVCOMP_HAS_COMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) -+ try { -+ return batched_compress_get_temp_size_ex( -+ compression, num_chunks, max_uncomp_chunk_size, max_total_uncomp_size); -+ } catch (...) { -+ // Ignore errors in the expanded version; fall back to the old API in case of failure -+ CUDF_LOG_WARN( -+ "CompressGetTempSizeEx call failed, falling back to CompressGetTempSize; this may increase " -+ "the memory usage"); -+ } -+#endif -+ -+ return batched_compress_get_temp_size(compression, num_chunks, max_uncomp_chunk_size); -+} - --// Wrapper for nvcompBatchedCompressGetMaxOutputChunkSize - size_t compress_max_output_chunk_size(compression_type compression, - uint32_t max_uncompressed_chunk_bytes) - { -@@ -188,13 +328,23 @@ size_t compress_max_output_chunk_size(compression_type compression, - capped_uncomp_bytes, nvcompBatchedSnappyDefaultOpts, &max_comp_chunk_size); - break; - case compression_type::DEFLATE: -+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - status = nvcompBatchedDeflateCompressGetMaxOutputChunkSize( - capped_uncomp_bytes, nvcompBatchedDeflateDefaultOpts, &max_comp_chunk_size); - break; -+#else -+ CUDF_FAIL("Compression error: " + -+ nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value()); -+#endif - case compression_type::ZSTD: -+#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - status = nvcompBatchedZstdCompressGetMaxOutputChunkSize( - capped_uncomp_bytes, nvcompBatchedZstdDefaultOpts, &max_comp_chunk_size); - break; -+#else -+ CUDF_FAIL("Compression error: " + -+ nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); -+#endif - case compression_type::LZ4: - status = nvcompBatchedLZ4CompressGetMaxOutputChunkSize( - capped_uncomp_bytes, nvcompBatchedLZ4DefaultOpts, &max_comp_chunk_size); -@@ -234,6 +384,7 @@ static void batched_compress_async(compression_type compression, - stream.value()); - break; - case compression_type::DEFLATE: -+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - nvcomp_status = nvcompBatchedDeflateCompressAsync(device_uncompressed_ptrs, - device_uncompressed_bytes, - max_uncompressed_chunk_bytes, -@@ -245,7 +396,12 @@ static void batched_compress_async(compression_type compression, - nvcompBatchedDeflateDefaultOpts, - stream.value()); - break; -+#else -+ CUDF_FAIL("Compression error: " + -+ nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value()); -+#endif - case compression_type::ZSTD: -+#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - nvcomp_status = nvcompBatchedZstdCompressAsync(device_uncompressed_ptrs, - device_uncompressed_bytes, - max_uncompressed_chunk_bytes, -@@ -257,6 +413,10 @@ static void batched_compress_async(compression_type compression, - nvcompBatchedZstdDefaultOpts, - stream.value()); - break; -+#else -+ CUDF_FAIL("Compression error: " + -+ nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); -+#endif - case compression_type::LZ4: - nvcomp_status = nvcompBatchedLZ4CompressAsync(device_uncompressed_ptrs, - device_uncompressed_bytes, -@@ -318,18 +478,16 @@ void batched_compress(compression_type compression, - } - - feature_status_parameters::feature_status_parameters() -- : feature_status_parameters(nvcomp_integration::is_all_enabled(), -- nvcomp_integration::is_stable_enabled()) --{ --} -- --feature_status_parameters::feature_status_parameters(bool all_enabled, bool stable_enabled) -- : lib_major_version{NVCOMP_VER_MAJOR}, -- lib_minor_version{NVCOMP_VER_MINOR}, -- lib_patch_version{NVCOMP_VER_PATCH}, -- are_all_integrations_enabled{all_enabled}, -- are_stable_integrations_enabled{stable_enabled} -+ : lib_major_version{NVCOMP_MAJOR_VERSION}, -+ lib_minor_version{NVCOMP_MINOR_VERSION}, -+ lib_patch_version{NVCOMP_PATCH_VERSION}, -+ are_all_integrations_enabled{nvcomp_integration::is_all_enabled()}, -+ are_stable_integrations_enabled{nvcomp_integration::is_stable_enabled()} - { -+ int device; -+ CUDF_CUDA_TRY(cudaGetDevice(&device)); -+ CUDF_CUDA_TRY( -+ cudaDeviceGetAttribute(&compute_capability_major, cudaDevAttrComputeCapabilityMajor, device)); - } - - // Represents all parameters required to determine status of a compression/decompression feature -@@ -352,21 +510,42 @@ std::optional is_compression_disabled_impl(compression_type compres - { - switch (compression) { - case compression_type::DEFLATE: { -+ if (not NVCOMP_HAS_DEFLATE( -+ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { -+ return "nvCOMP 2.5 or newer is required for Deflate compression"; -+ } - if (not params.are_all_integrations_enabled) { - return "DEFLATE compression is experimental, you can enable it through " - "`LIBCUDF_NVCOMP_POLICY` environment variable."; - } - return std::nullopt; - } -+ case compression_type::SNAPPY: { -+ if (not params.are_stable_integrations_enabled) { -+ return "Snappy compression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " -+ "environment variable."; -+ } -+ return std::nullopt; -+ } -+ case compression_type::ZSTD: { -+ if (not NVCOMP_HAS_ZSTD_COMP( -+ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { -+ return "nvCOMP 2.4 or newer is required for Zstandard compression"; -+ } -+ if (not params.are_stable_integrations_enabled) { -+ return "Zstandard compression is experimental, you can enable it through " -+ "`LIBCUDF_NVCOMP_POLICY` environment variable."; -+ } -+ return std::nullopt; -+ } - case compression_type::LZ4: -- case compression_type::SNAPPY: -- case compression_type::ZSTD: - if (not params.are_stable_integrations_enabled) { -- return "nvCOMP use is disabled through the `LIBCUDF_NVCOMP_POLICY` environment variable."; -+ return "LZ4 compression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " -+ "environment variable."; - } - return std::nullopt; -- default: return "Unsupported compression type"; - } -+ return "Unsupported compression type"; - } - - std::optional is_compression_disabled(compression_type compression, -@@ -398,26 +577,58 @@ std::optional is_compression_disabled(compression_type compression, - return reason; - } - -+std::optional is_zstd_decomp_disabled(feature_status_parameters const& params) -+{ -+ if (not NVCOMP_HAS_ZSTD_DECOMP( -+ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { -+ return "nvCOMP 2.3 or newer is required for Zstandard decompression"; -+ } -+ -+ if (NVCOMP_ZSTD_DECOMP_IS_STABLE( -+ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { -+ if (not params.are_stable_integrations_enabled) { -+ return "Zstandard decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " -+ "environment variable."; -+ } -+ } else if (not params.are_all_integrations_enabled) { -+ return "Zstandard decompression is experimental, you can enable it through " -+ "`LIBCUDF_NVCOMP_POLICY` environment variable."; -+ } -+ -+ return std::nullopt; -+} -+ - std::optional is_decompression_disabled_impl(compression_type compression, - feature_status_parameters params) - { - switch (compression) { -- case compression_type::DEFLATE: -- case compression_type::GZIP: { -+ case compression_type::DEFLATE: { -+ if (not NVCOMP_HAS_DEFLATE( -+ params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { -+ return "nvCOMP 2.5 or newer is required for Deflate decompression"; -+ } - if (not params.are_all_integrations_enabled) { - return "DEFLATE decompression is experimental, you can enable it through " - "`LIBCUDF_NVCOMP_POLICY` environment variable."; - } - return std::nullopt; - } -- case compression_type::LZ4: -- case compression_type::SNAPPY: -- case compression_type::ZSTD: { -+ case compression_type::SNAPPY: { - if (not params.are_stable_integrations_enabled) { -- return "nvCOMP use is disabled through the `LIBCUDF_NVCOMP_POLICY` environment variable."; -+ return "Snappy decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " -+ "environment variable."; - } - return std::nullopt; - } -+ case compression_type::ZSTD: return is_zstd_decomp_disabled(params); -+ case compression_type::LZ4: { -+ if (not params.are_stable_integrations_enabled) { -+ return "LZ4 decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " -+ "environment variable."; -+ } -+ return std::nullopt; -+ } -+ default: return "Unsupported compression type"; - } - return "Unsupported compression type"; - } -@@ -451,27 +662,43 @@ std::optional is_decompression_disabled(compression_type compressio - return reason; - } - --size_t required_alignment(compression_type compression) -+size_t compress_input_alignment_bits(compression_type compression) - { - switch (compression) { -- case compression_type::GZIP: -- case compression_type::DEFLATE: return nvcompDeflateRequiredAlignment; -- case compression_type::SNAPPY: return nvcompSnappyRequiredAlignment; -- case compression_type::ZSTD: return nvcompZstdRequiredAlignment; -- case compression_type::LZ4: return nvcompLZ4RequiredAlignment; -+ case compression_type::DEFLATE: return 0; -+ case compression_type::SNAPPY: return 0; -+ case compression_type::ZSTD: return 2; -+ case compression_type::LZ4: return 2; - default: CUDF_FAIL("Unsupported compression type"); - } - } - --std::optional compress_max_allowed_chunk_size(compression_type compression) -+size_t compress_output_alignment_bits(compression_type compression) - { - switch (compression) { -- case compression_type::DEFLATE: return nvcompDeflateCompressionMaxAllowedChunkSize; -- case compression_type::SNAPPY: return nvcompSnappyCompressionMaxAllowedChunkSize; -- case compression_type::ZSTD: return nvcompZstdCompressionMaxAllowedChunkSize; -- case compression_type::LZ4: return nvcompLZ4CompressionMaxAllowedChunkSize; -+ case compression_type::DEFLATE: return 3; -+ case compression_type::SNAPPY: return 0; -+ case compression_type::ZSTD: return 0; -+ case compression_type::LZ4: return 2; - default: CUDF_FAIL("Unsupported compression type"); - } - } - -+std::optional compress_max_allowed_chunk_size(compression_type compression) -+{ -+ switch (compression) { -+ case compression_type::DEFLATE: return 64 * 1024; -+ case compression_type::SNAPPY: return std::nullopt; -+ case compression_type::ZSTD: -+#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) -+ return nvcompZstdCompressionMaxAllowedChunkSize; -+#else -+ CUDF_FAIL("Compression error: " + -+ nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); -+#endif -+ case compression_type::LZ4: return 16 * 1024 * 1024; -+ default: return std::nullopt; -+ } -+} -+ - } // namespace cudf::io::nvcomp -diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp -index 583bd6a352..43c79e3237 100644 ---- a/cpp/src/io/comp/nvcomp_adapter.hpp -+++ b/cpp/src/io/comp/nvcomp_adapter.hpp -@@ -75,12 +75,20 @@ size_t batched_decompress_temp_size(compression_type compression, - uint32_t max_uncomp_chunk_size); - - /** -- * @brief Gets input and output alignment requirements for the given compression type. -+ * @brief Gets input alignment requirements for the given compression type. - * - * @param compression Compression type -- * @returns required alignment -+ * @returns required alignment, in bits - */ --[[nodiscard]] size_t required_alignment(compression_type compression); -+[[nodiscard]] size_t compress_input_alignment_bits(compression_type compression); -+ -+/** -+ * @brief Gets output alignment requirements for the given compression type. -+ * -+ * @param compression Compression type -+ * @returns required alignment, in bits -+ */ -+[[nodiscard]] size_t compress_output_alignment_bits(compression_type compression); - - /** - * @brief Maximum size of uncompressed chunks that can be compressed with nvCOMP. -diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu -index 60a64fb0ee..40cfbe763b 100644 ---- a/cpp/src/io/orc/writer_impl.cu -+++ b/cpp/src/io/orc/writer_impl.cu -@@ -533,20 +533,20 @@ auto uncomp_block_alignment(CompressionKind compression_kind) - { - if (compression_kind == NONE or - nvcomp::is_compression_disabled(to_nvcomp_compression_type(compression_kind))) { -- return 1ul; -+ return 1u; - } - -- return nvcomp::required_alignment(to_nvcomp_compression_type(compression_kind)); -+ return 1u << nvcomp::compress_input_alignment_bits(to_nvcomp_compression_type(compression_kind)); - } - - auto comp_block_alignment(CompressionKind compression_kind) - { - if (compression_kind == NONE or - nvcomp::is_compression_disabled(to_nvcomp_compression_type(compression_kind))) { -- return 1ul; -+ return 1u; - } - -- return nvcomp::required_alignment(to_nvcomp_compression_type(compression_kind)); -+ return 1u << nvcomp::compress_output_alignment_bits(to_nvcomp_compression_type(compression_kind)); - } - - /** -diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu -index 27312a4da8..779d40281b 100644 ---- a/cpp/src/io/parquet/reader_impl_chunking.cu -+++ b/cpp/src/io/parquet/reader_impl_chunking.cu -@@ -865,18 +865,8 @@ std::vector compute_page_splits_by_row(device_span=0.0.0a0 -+ - &pynvjitlink_unsuffixed pynvjitlink - - matrix: {cuda: "11.*"} - packages: - - &cubinlinker_unsuffixed cubinlinker -@@ -676,7 +676,7 @@ dependencies: - cuda: "12.*" - cuda_suffixed: "true" - packages: -- - pynvjitlink-cu12>=0.0.0a0 -+ - pynvjitlink-cu12 - - matrix: - cuda: "12.*" - cuda_suffixed: "false" -diff --git a/docs/cudf/source/user_guide/io/io.md b/docs/cudf/source/user_guide/io/io.md -index 97b961b455..adcdaa51e7 100644 ---- a/docs/cudf/source/user_guide/io/io.md -+++ b/docs/cudf/source/user_guide/io/io.md -@@ -75,6 +75,7 @@ IO format. - - - -+ - **Notes:** - - - \[¹\] - Not all orientations are GPU-accelerated. -@@ -176,9 +177,4 @@ If no value is set, behavior will be the same as the "STABLE" option. - +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ - | DEFLATE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | Experimental | Experimental | ❌ | - +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ -- | LZ4 | ❌ | ❌ | Stable | Stable | ❌ | ❌ | Stable | Stable | ❌ | -- +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ -- | GZIP | ❌ | ❌ | Experimental | Experimental | ❌ | ❌ | ❌ | ❌ | ❌ | -- +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ -- - ``` -diff --git a/java/pom.xml b/java/pom.xml -index 450cfbdbc8..55cb055398 100644 ---- a/java/pom.xml -+++ b/java/pom.xml -@@ -1,6 +1,6 @@ - -