const& input,
+ cudf::column_view const& is_null,
+ rmm::cuda_stream_view stream = cudf::get_default_stream(),
+ rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
+
} // namespace spark_rapids_jni
diff --git a/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java b/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java
index 3a7c4a6a53..1a41e58613 100644
--- a/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java
+++ b/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java
@@ -160,6 +160,65 @@ public static ColumnVector extractRawMapFromJsonString(ColumnView input) {
return new ColumnVector(extractRawMapFromJsonString(input.getNativeView()));
}
+ /**
+ * A class to hold the result when concatenating JSON strings.
+ *
+ * A long with the concatenated data, the result also contains a vector that indicates
+ * whether each row in the input is null or empty, and the delimiter used for concatenation.
+ */
+ public static class ConcatenatedJson implements AutoCloseable {
+ public final ColumnVector isNullOrEmpty;
+ public final DeviceMemoryBuffer data;
+ public final char delimiter;
+
+ public ConcatenatedJson(ColumnVector isNullOrEmpty, DeviceMemoryBuffer data, char delimiter) {
+ this.isNullOrEmpty = isNullOrEmpty;
+ this.data = data;
+ this.delimiter = delimiter;
+ }
+
+ @Override
+ public void close() {
+ isNullOrEmpty.close();
+ data.close();
+ }
+ }
+
+ /**
+ * Concatenate JSON strings in the input column into a single JSON string.
+ *
+ * During concatenation, the function also generates a boolean vector that indicates whether
+ * each row in the input is null or empty. The delimiter used for concatenation is also returned.
+ *
+ * @param input The input strings column to concatenate
+ * @return A {@link ConcatenatedJson} object that contains the concatenated output
+ */
+ public static ConcatenatedJson concatenateJsonStrings(ColumnView input) {
+ assert (input.getType().equals(DType.STRING)) : "Input must be of STRING type";
+ long[] concatenated = concatenateJsonStrings(input.getNativeView());
+ return new ConcatenatedJson(new ColumnVector(concatenated[0]),
+ DeviceMemoryBuffer.fromRmm(concatenated[1], concatenated[2], concatenated[3]),
+ (char) concatenated[4]);
+ }
+
+ /**
+ * Create a structs column from the given children columns and a boolean column specifying
+ * the rows at which the output column.should be null.
+ *
+ * Note that the children columns are expected to have null rows at the same positions indicated
+ * by the input isNull column.
+ *
+ * @param children The children columns of the output structs column
+ * @param isNull A boolean column specifying the rows at which the output column should be null
+ * @return A structs column created from the given children and the isNull column
+ */
+ public static ColumnVector makeStructs(ColumnView[] children, ColumnView isNull) {
+ long[] handles = new long[children.length];
+ for (int i = 0; i < children.length; i++) {
+ handles[i] = children[i].getNativeView();
+ }
+ return new ColumnVector(makeStructs(handles, isNull.getNativeView()));
+ }
private static native int getMaxJSONPathDepth();
@@ -178,4 +237,8 @@ private static native long[] getJsonObjectMultiplePaths(long input,
private static native long extractRawMapFromJsonString(long input);
+
+ private static native long[] concatenateJsonStrings(long input);
+
+ private static native long makeStructs(long[] children, long isNull);
}
From 42dd8987acb183ffcc534a987ae29b202bc01c39 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 11 Oct 2024 10:51:06 +0800
Subject: [PATCH 045/157] Update submodule cudf to
1436cac9de8b450a32e71d5b779503e9a29edaa6 (#2489)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/rapids-cmake.sha | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 69b0f661ff..1436cac9de 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 69b0f661ff2fc4c12bb0fe696e556f6b3224b381
+Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
index 5d73ae95b4..1073147e4f 100644
--- a/thirdparty/cudf-pins/rapids-cmake.sha
+++ b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -1 +1 @@
-fe3362fa774195a3d434d6835416672e1d46555e
+27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae
From 6314455a5905d14bb37e94e8f1a669ce6c850ad9 Mon Sep 17 00:00:00 2001
From: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 11 Oct 2024 03:00:55 +0000
Subject: [PATCH 046/157] Auto-merge use branch-24.12 versions
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/rapids-cmake.sha | 2 +-
thirdparty/cudf-pins/versions.json | 24 ++++++++++++++++--------
3 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 319a53327a..1436cac9de 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 319a53327ac7c921a78979a1f23c5caf7171129d
+Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
index 37820d8ad4..1073147e4f 100644
--- a/thirdparty/cudf-pins/rapids-cmake.sha
+++ b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -1 +1 @@
-312909127cf0fe96e178f0ffa754908f58d489a3
+27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index ed40c777a4..a93318b4a8 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,9 +44,9 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "1b85263eba89c0f077fbb3da90a770b84161d20f",
+ "git_tag" : "1ef4094331be58ce881e534d669da706bdb979ed",
"git_url" : "https://github.com/rapidsai/kvikio.git",
- "version" : "24.10"
+ "version" : "24.12"
},
"bs_thread_pool" :
{
@@ -60,7 +60,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "d3477661d771e0d6fd22259bf6dd6f8c64a7401c",
+ "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6",
"git_url" : "https://github.com/NVIDIA/cuCollections.git",
"version" : "0.0.1"
},
@@ -109,6 +109,14 @@
"git_shallow" : false,
"git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
"git_url" : "https://github.com/apache/arrow-nanoarrow.git",
+ "patches" :
+ [
+ {
+ "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
+ "fixed_in" : "",
+ "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537"
+ }
+ ],
"version" : "0.6.0.dev"
},
"nvcomp" :
@@ -119,15 +127,15 @@
"git_url" : "https://github.com/NVIDIA/nvcomp.git",
"proprietary_binary" :
{
- "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz",
- "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz"
+ "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz",
+ "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz"
},
"proprietary_binary_cuda_version_mapping" :
{
"11" : "11.x",
"12" : "12.x"
},
- "version" : "3.0.6"
+ "version" : "4.0.1"
},
"nvtx3" :
{
@@ -141,9 +149,9 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "afe0a3336397b17a96bb703e82f3b6365ee7c41e",
+ "git_tag" : "90a5631e1093ce44c4feceb88fcf557c3dfc043b",
"git_url" : "https://github.com/rapidsai/rmm.git",
- "version" : "24.10"
+ "version" : "24.12"
},
"spdlog" :
{
From a3132f5e6fdcaadc8216acc03851e17f38ffedfb Mon Sep 17 00:00:00 2001
From: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 11 Oct 2024 04:30:58 +0000
Subject: [PATCH 047/157] Auto-merge use branch-24.12 versions
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/rapids-cmake.sha | 2 +-
thirdparty/cudf-pins/versions.json | 24 ++++++++++++++++--------
3 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 319a53327a..1436cac9de 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 319a53327ac7c921a78979a1f23c5caf7171129d
+Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
index 37820d8ad4..1073147e4f 100644
--- a/thirdparty/cudf-pins/rapids-cmake.sha
+++ b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -1 +1 @@
-312909127cf0fe96e178f0ffa754908f58d489a3
+27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index ed40c777a4..a93318b4a8 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,9 +44,9 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "1b85263eba89c0f077fbb3da90a770b84161d20f",
+ "git_tag" : "1ef4094331be58ce881e534d669da706bdb979ed",
"git_url" : "https://github.com/rapidsai/kvikio.git",
- "version" : "24.10"
+ "version" : "24.12"
},
"bs_thread_pool" :
{
@@ -60,7 +60,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "d3477661d771e0d6fd22259bf6dd6f8c64a7401c",
+ "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6",
"git_url" : "https://github.com/NVIDIA/cuCollections.git",
"version" : "0.0.1"
},
@@ -109,6 +109,14 @@
"git_shallow" : false,
"git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
"git_url" : "https://github.com/apache/arrow-nanoarrow.git",
+ "patches" :
+ [
+ {
+ "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
+ "fixed_in" : "",
+ "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537"
+ }
+ ],
"version" : "0.6.0.dev"
},
"nvcomp" :
@@ -119,15 +127,15 @@
"git_url" : "https://github.com/NVIDIA/nvcomp.git",
"proprietary_binary" :
{
- "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz",
- "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz"
+ "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz",
+ "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz"
},
"proprietary_binary_cuda_version_mapping" :
{
"11" : "11.x",
"12" : "12.x"
},
- "version" : "3.0.6"
+ "version" : "4.0.1"
},
"nvtx3" :
{
@@ -141,9 +149,9 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "afe0a3336397b17a96bb703e82f3b6365ee7c41e",
+ "git_tag" : "90a5631e1093ce44c4feceb88fcf557c3dfc043b",
"git_url" : "https://github.com/rapidsai/rmm.git",
- "version" : "24.10"
+ "version" : "24.12"
},
"spdlog" :
{
From 2c85286887727c82a5de8ab2f56ebe49e7767077 Mon Sep 17 00:00:00 2001
From: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 11 Oct 2024 06:28:34 +0000
Subject: [PATCH 048/157] Auto-merge use branch-24.12 versions
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/rapids-cmake.sha | 2 +-
thirdparty/cudf-pins/versions.json | 24 ++++++++++++++++--------
3 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 7b0adfa253..1436cac9de 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 7b0adfa2533e4792464230ee67916a04ce06caf6
+Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
index 37820d8ad4..1073147e4f 100644
--- a/thirdparty/cudf-pins/rapids-cmake.sha
+++ b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -1 +1 @@
-312909127cf0fe96e178f0ffa754908f58d489a3
+27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index ed40c777a4..a93318b4a8 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,9 +44,9 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "1b85263eba89c0f077fbb3da90a770b84161d20f",
+ "git_tag" : "1ef4094331be58ce881e534d669da706bdb979ed",
"git_url" : "https://github.com/rapidsai/kvikio.git",
- "version" : "24.10"
+ "version" : "24.12"
},
"bs_thread_pool" :
{
@@ -60,7 +60,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "d3477661d771e0d6fd22259bf6dd6f8c64a7401c",
+ "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6",
"git_url" : "https://github.com/NVIDIA/cuCollections.git",
"version" : "0.0.1"
},
@@ -109,6 +109,14 @@
"git_shallow" : false,
"git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
"git_url" : "https://github.com/apache/arrow-nanoarrow.git",
+ "patches" :
+ [
+ {
+ "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
+ "fixed_in" : "",
+ "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537"
+ }
+ ],
"version" : "0.6.0.dev"
},
"nvcomp" :
@@ -119,15 +127,15 @@
"git_url" : "https://github.com/NVIDIA/nvcomp.git",
"proprietary_binary" :
{
- "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz",
- "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz"
+ "aarch64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz",
+ "x86_64-linux" : "https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz"
},
"proprietary_binary_cuda_version_mapping" :
{
"11" : "11.x",
"12" : "12.x"
},
- "version" : "3.0.6"
+ "version" : "4.0.1"
},
"nvtx3" :
{
@@ -141,9 +149,9 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "afe0a3336397b17a96bb703e82f3b6365ee7c41e",
+ "git_tag" : "90a5631e1093ce44c4feceb88fcf557c3dfc043b",
"git_url" : "https://github.com/rapidsai/rmm.git",
- "version" : "24.10"
+ "version" : "24.12"
},
"spdlog" :
{
From 024b9c6f38b1fd0a221bfedf865ee1dbbd2e81bf Mon Sep 17 00:00:00 2001
From: Nghia Truong <7416935+ttnghia@users.noreply.github.com>
Date: Fri, 11 Oct 2024 14:09:29 -0700
Subject: [PATCH 049/157] Avoid parsing field name twice when matching named
instruction in `get_json_object` kernel (#2471)
* Temporarily add back benchmark code
Signed-off-by: Nghia Truong
* Optimize stack data
Signed-off-by: Nghia Truong
* Revert "Optimize stack data"
This reverts commit 59ac5bf0d494e17fbf9b2a7f4806abe057a65c45.
* Reorganize code
Signed-off-by: Nghia Truong
* Perform name matching when parsing name field
Signed-off-by: Nghia Truong
* Optimize write
Signed-off-by: Nghia Truong
* Cleanup
Signed-off-by: Nghia Truong
* Simplify `char_range_reader`
Signed-off-by: Nghia Truong
* Try to reduce stack data size
Signed-off-by: Nghia Truong
* Revert "Try to reduce stack data size"
This reverts commit a8d563c033f7c4eb20c3055f2ed99f559e284a2b.
* Cleanup
Signed-off-by: Nghia Truong
* Revert benchmark
Signed-off-by: Nghia Truong
---------
Signed-off-by: Nghia Truong
---
src/main/cpp/src/get_json_object.cu | 24 +--
src/main/cpp/src/json_parser.cuh | 261 ++++++++++++----------------
2 files changed, 120 insertions(+), 165 deletions(-)
diff --git a/src/main/cpp/src/get_json_object.cu b/src/main/cpp/src/get_json_object.cu
index f836186192..622a56bc55 100644
--- a/src/main/cpp/src/get_json_object.cu
+++ b/src/main/cpp/src/get_json_object.cu
@@ -304,17 +304,6 @@ __device__ inline thrust::tuple path_match_index(
}
}
-__device__ inline thrust::tuple path_match_named(
- cudf::device_span path)
-{
- auto match = path_match_element(path, path_instruction_type::NAMED);
- if (match) {
- return thrust::make_tuple(true, path.data()[0].name);
- } else {
- return thrust::make_tuple(false, cudf::string_view());
- }
-}
-
__device__ inline thrust::tuple path_match_index_wildcard(
cudf::device_span path)
{
@@ -464,7 +453,7 @@ __device__ thrust::pair evaluate_path(
// case (START_OBJECT, Named :: xs)
// case path 4
else if (json_token::START_OBJECT == ctx.token &&
- thrust::get<0>(path_match_named(ctx.path))) {
+ ctx.path.front().type == path_instruction_type::NAMED) {
if (!ctx.is_first_enter) {
// 2st enter
// skip the following children after the expect
@@ -492,15 +481,16 @@ __device__ thrust::pair evaluate_path(
ctx.is_first_enter = false;
// match first mached children with expected name
bool found_expected_child = false;
- while (json_token::END_OBJECT != p.next_token()) {
+ auto const to_match_name = ctx.path.front().name;
+ while (true) {
+ auto const is_name_matched = p.parse_next_token_with_matching(to_match_name);
+ if (json_token::END_OBJECT == p.get_current_token()) { break; }
+
// JSON validation check
if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }
- // need to try more children
- auto match_named = path_match_named(ctx.path);
- auto named = thrust::get<1>(match_named);
// current token is FIELD_NAME
- if (p.match_current_field_name(named)) {
+ if (is_name_matched) {
// skip FIELD_NAME token
p.next_token();
// JSON validation check
diff --git a/src/main/cpp/src/json_parser.cuh b/src/main/cpp/src/json_parser.cuh
index 4e712937ed..446caf6357 100644
--- a/src/main/cpp/src/json_parser.cuh
+++ b/src/main/cpp/src/json_parser.cuh
@@ -134,13 +134,8 @@ class char_range {
__device__ inline cudf::size_type size() const { return _len; }
__device__ inline char const* data() const { return _data; }
- __device__ inline char const* start() const { return _data; }
- __device__ inline char const* end() const { return _data + _len; }
-
- __device__ inline bool eof(cudf::size_type pos) const { return pos >= _len; }
__device__ inline bool is_null() const { return _data == nullptr; }
- __device__ inline bool is_empty() const { return _len == 0; }
-
+ __device__ inline bool is_empty() const { return _len <= 0; }
__device__ inline char operator[](cudf::size_type pos) const { return _data[pos]; }
__device__ inline cudf::string_view slice_sv(cudf::size_type pos, cudf::size_type len) const
@@ -153,35 +148,29 @@ class char_range {
return char_range(_data + pos, len);
}
- private:
+ protected:
char const* _data;
cudf::size_type _len;
};
/**
- * A char_range that keeps track of where in the data it currently is.
+ * A char range that moves the begin pointer of the current range forward while reading.
+ *
+ * This support continuous reading of characters without the need of an additional variable
+ * to keep track of the current reading position.
*/
-class char_range_reader {
+class char_range_reader : public char_range {
public:
- __device__ inline explicit char_range_reader(char_range range) : _range(range), _pos(0) {}
-
- __device__ inline char_range_reader(char_range range, cudf::size_type start)
- : _range(range), _pos(start)
+ __device__ inline explicit char_range_reader(char_range range) : char_range(std::move(range)) {}
+ __device__ inline void next()
{
+ _data++;
+ _len--;
}
- __device__ inline bool eof() const { return _range.eof(_pos); }
- __device__ inline bool is_null() const { return _range.is_null(); }
-
- __device__ inline void next() { _pos++; }
-
- __device__ inline char current_char() const { return _range[_pos]; }
-
- __device__ inline cudf::size_type pos() const { return _pos; }
-
- private:
- char_range _range;
- cudf::size_type _pos;
+ // Warning: this does not check for out-of-bound access.
+ // The caller must be responsible to check for empty range before calling this.
+ __device__ inline char current_char() const { return _data[0]; }
};
/**
@@ -298,7 +287,7 @@ class json_parser {
*/
static __device__ inline bool try_skip(char_range_reader& reader, char expected)
{
- if (!reader.eof() && reader.current_char() == expected) {
+ if (!reader.is_empty() && reader.current_char() == expected) {
reader.next();
return true;
}
@@ -412,12 +401,10 @@ class json_parser {
*/
__device__ inline void parse_string_and_set_current()
{
- // TODO eventually chars should be a reader so we can just pass it in...
- char_range_reader reader(chars, curr_pos);
- auto [success, end_char_pos] = try_parse_string(reader);
+ [[maybe_unused]] auto const [success, matched, end] =
+ try_parse_string(char_range_reader{chars.slice(curr_pos, chars.size() - curr_pos)});
if (success) {
- // TODO remove end_char_pos, and just get it from the reader...
- curr_pos = end_char_pos;
+ curr_pos = static_cast(thrust::distance(chars.data(), end));
current_token = json_token::VALUE_STRING;
} else {
set_current_error();
@@ -499,7 +486,7 @@ class json_parser {
char* copy_destination,
escape_style w_style)
{
- if (str.eof()) { return 0; }
+ if (str.is_empty()) { return 0; }
char const quote_char = str.current_char();
int output_size_bytes = 0;
@@ -514,7 +501,7 @@ class json_parser {
str.next();
// scan string content
- while (!str.eof()) {
+ while (!str.is_empty()) {
char const c = str.current_char();
int const v = static_cast(c);
if (c == quote_char) {
@@ -546,8 +533,10 @@ class json_parser {
} else if ('\\' == c) {
// path 3: escape path
str.next();
- char_range_reader to_match(char_range::null());
- if (!try_skip_escape_part(str, to_match, copy_destination, w_style, output_size_bytes)) {
+ char_range_reader to_match(char_range::null()); // unused
+ bool matched_field_name{false}; // unused
+ if (!try_skip_escape_part(
+ str, to_match, copy_destination, w_style, output_size_bytes, matched_field_name)) {
return output_size_bytes;
}
} else {
@@ -610,85 +599,58 @@ class json_parser {
*
* @param str string to parse
* @param to_match expected match str
- * @param w_style the escape style for writing.
- * @return a pair of success and length, where success is true if the string
- * is valid and length is the number of bytes needed to encode the string
- * in the given style.
+ * @return a tuple of values indicating if the parse process was successful, if field name was
+ * matched, and a pointer to the past-end position of the parsed data
*/
- static __device__ inline std::pair try_parse_string(
- char_range_reader& str,
- char_range_reader to_match = char_range_reader(char_range::null()),
- escape_style w_style = escape_style::UNESCAPED)
+ static __device__ inline thrust::tuple try_parse_string(
+ char_range_reader str, char_range_reader to_match = char_range_reader(char_range::null()))
{
- if (str.eof()) { return std::make_pair(false, 0); }
- char const quote_char = str.current_char();
- int output_size_bytes = 0;
-
- // write the first " if write style is escaped
- if (escape_style::ESCAPED == w_style) { output_size_bytes++; }
+ if (str.is_empty()) { return thrust::make_tuple(false, false, nullptr); }
+ char const quote_char = str.current_char();
+ bool matched_field_name = !to_match.is_null();
// skip left quote char
// We don't need to actually verify what it is, because we just read it.
str.next();
// scan string content
- while (!str.eof()) {
+ while (!str.is_empty()) {
char c = str.current_char();
int v = static_cast(c);
- if (c == quote_char) {
- // path 1: match closing quote char
+ if (c == quote_char) { // path 1: match closing quote char
str.next();
-
- // match check, the last char in match_str is quote_char
- if (!to_match.is_null() && !to_match.eof()) { return std::make_pair(false, 0); }
-
- // write the end " if write style is escaped
- if (escape_style::ESCAPED == w_style) { output_size_bytes++; }
-
- return std::make_pair(true, str.pos());
- } else if (v >= 0 && v < 32) {
- // path 2: unescaped control char
-
- // copy if enabled, escape mode, write more chars
- if (escape_style::ESCAPED == w_style) {
- int escape_chars = escape_char(str.current_char(), nullptr);
- output_size_bytes += (escape_chars - 1);
- }
-
- // check match if enabled
- if (!try_match_char(to_match, str.current_char())) { return std::make_pair(false, 0); }
-
+ matched_field_name = matched_field_name && (to_match.is_null() || to_match.is_empty());
+ return thrust::make_tuple(true, matched_field_name, str.data());
+ } else if (v >= 0 && v < 32) { // path 2: unescaped control char
+ matched_field_name = matched_field_name && try_match_char(to_match, c);
str.next();
- output_size_bytes++;
continue;
- } else if ('\\' == c) {
- // path 3: escape path
+ } else if ('\\' == c) { // path 3: escape path
str.next();
- char* copy_dest_nullptr = nullptr;
- if (!try_skip_escape_part(str, to_match, copy_dest_nullptr, w_style, output_size_bytes)) {
- return std::make_pair(false, 0);
- }
- } else {
- // path 4: safe code point
-
- // handle single unescaped " char; happens when string is quoted by char '
- // e.g.: 'A"' string, escape to "A\\"" (5 chars: " A \ " ")
- if ('\"' == c && escape_style::ESCAPED == w_style) { output_size_bytes++; }
- if (!try_skip_safe_code_point(str, c)) { return std::make_pair(false, 0); }
- // check match if enabled
- if (!try_match_char(to_match, c)) { return std::make_pair(false, 0); }
- output_size_bytes++;
+ char* copy_dest_nullptr = nullptr; // unused
+ int output_size_bytes = 0; // unused
+ if (!try_skip_escape_part(str,
+ to_match,
+ copy_dest_nullptr,
+ escape_style::UNESCAPED,
+ output_size_bytes,
+ matched_field_name)) {
+ return thrust::make_tuple(false, false, nullptr);
+ }
+ } else { // path 4: safe code point
+ if (!try_skip_safe_code_point(str, c)) { return thrust::make_tuple(false, false, nullptr); }
+ matched_field_name = matched_field_name && try_match_char(to_match, c);
}
}
- return std::make_pair(false, 0);
+ return thrust::make_tuple(false, false, nullptr);
}
static __device__ inline bool try_match_char(char_range_reader& reader, char c)
{
if (!reader.is_null()) {
- if (!reader.eof() && reader.current_char() == c) {
+ if (!reader.is_empty() && reader.current_char() == c) {
reader.next();
return true;
} else {
@@ -708,11 +670,12 @@ class json_parser {
char_range_reader& to_match,
char*& copy_dest,
escape_style w_style,
- int& output_size_bytes)
+ int& output_size_bytes,
+ bool& matched_field_name)
{
// already skipped the first '\'
// try skip second part
- if (!str.eof()) {
+ if (!str.is_empty()) {
char const c = str.current_char();
switch (c) {
// path 1: \", \', \\, \/, \b, \f, \n, \r, \t
@@ -725,17 +688,17 @@ class json_parser {
}
output_size_bytes++;
}
- if (!try_match_char(to_match, c)) { return false; }
output_size_bytes++;
str.next();
+ matched_field_name = matched_field_name && try_match_char(to_match, c);
return true;
case '\'':
// for both unescaped/escaped writes a single char '
if (nullptr != copy_dest) { *copy_dest++ = c; }
- if (!try_match_char(to_match, c)) { return false; }
output_size_bytes++;
str.next();
+ matched_field_name = matched_field_name && try_match_char(to_match, c);
return true;
case '\\':
if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = c; }
@@ -746,16 +709,16 @@ class json_parser {
}
output_size_bytes++;
}
- if (!try_match_char(to_match, c)) { return false; }
output_size_bytes++;
str.next();
+ matched_field_name = matched_field_name && try_match_char(to_match, c);
return true;
case '/':
// for both unescaped/escaped writes a single char /
if (nullptr != copy_dest) { *copy_dest++ = c; }
- if (!try_match_char(to_match, c)) { return false; }
output_size_bytes++;
str.next();
+ matched_field_name = matched_field_name && try_match_char(to_match, c);
return true;
case 'b':
if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\b'; }
@@ -766,9 +729,9 @@ class json_parser {
}
output_size_bytes++;
}
- if (!try_match_char(to_match, '\b')) { return false; }
output_size_bytes++;
str.next();
+ matched_field_name = matched_field_name && try_match_char(to_match, '\b');
return true;
case 'f':
if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\f'; }
@@ -779,9 +742,9 @@ class json_parser {
}
output_size_bytes++;
}
- if (!try_match_char(to_match, '\f')) { return false; }
output_size_bytes++;
str.next();
+ matched_field_name = matched_field_name && try_match_char(to_match, '\f');
return true;
case 'n':
if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\n'; }
@@ -792,9 +755,9 @@ class json_parser {
}
output_size_bytes++;
}
- if (!try_match_char(to_match, '\n')) { return false; }
output_size_bytes++;
str.next();
+ matched_field_name = matched_field_name && try_match_char(to_match, '\n');
return true;
case 'r':
if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\r'; }
@@ -805,9 +768,9 @@ class json_parser {
}
output_size_bytes++;
}
- if (!try_match_char(to_match, '\r')) { return false; }
output_size_bytes++;
str.next();
+ matched_field_name = matched_field_name && try_match_char(to_match, '\r');
return true;
case 't':
if (nullptr != copy_dest && escape_style::UNESCAPED == w_style) { *copy_dest++ = '\t'; }
@@ -818,9 +781,9 @@ class json_parser {
}
output_size_bytes++;
}
- if (!try_match_char(to_match, '\t')) { return false; }
output_size_bytes++;
str.next();
+ matched_field_name = matched_field_name && try_match_char(to_match, '\t');
return true;
// path 1 done: \", \', \\, \/, \b, \f, \n, \r, \t
case 'u':
@@ -829,7 +792,7 @@ class json_parser {
// for both unescaped/escaped writes corresponding utf8 bytes, no need
// to pass in write style
- return try_skip_unicode(str, to_match, copy_dest, output_size_bytes);
+ return try_skip_unicode(str, to_match, copy_dest, output_size_bytes, matched_field_name);
default:
// path 3: invalid
return false;
@@ -948,13 +911,14 @@ class json_parser {
static __device__ bool try_skip_unicode(char_range_reader& str,
char_range_reader& to_match,
char*& copy_dest,
- int& output_size_bytes)
+ int& output_size_bytes,
+ bool& matched_field_name)
{
// already parsed \u
// now we expect 4 hex chars.
cudf::char_utf8 code_point = 0;
for (size_t i = 0; i < 4; i++) {
- if (str.eof()) { return false; }
+ if (str.is_empty()) { return false; }
char const c = str.current_char();
str.next();
if (!is_hex_digit(c)) { return false; }
@@ -976,9 +940,12 @@ class json_parser {
}
}
- if (!to_match.is_null()) {
+ if (matched_field_name && !to_match.is_null()) {
for (cudf::size_type i = 0; i < bytes; i++) {
- if (to_match.eof() || to_match.current_char() != buff[i]) { return false; }
+ if (to_match.is_empty() || to_match.current_char() != buff[i]) {
+ matched_field_name = false;
+ break;
+ }
to_match.next();
}
}
@@ -1210,16 +1177,17 @@ class json_parser {
/**
* parse the key string in key:value pair
*/
- __device__ inline void parse_field_name_and_set_current()
+ __device__ inline void parse_field_name_and_set_current(
+ bool& matched_field_name, char_range to_match_field_name = char_range::null())
{
- // TODO eventually chars should be a reader so we can just pass it in...
- char_range_reader reader(chars, curr_pos);
- current_token_start_pos = curr_pos;
- auto [success, end_char_pos] = try_parse_string(reader);
+ current_token_start_pos = curr_pos;
+ auto const [success, matched, end] =
+ try_parse_string(char_range_reader{chars.slice(curr_pos, chars.size() - curr_pos)},
+ char_range_reader{std::move(to_match_field_name)});
if (success) {
- // TODO remove end_char_pos, and just get it from the reader...
- curr_pos = end_char_pos;
- current_token = json_token::FIELD_NAME;
+ matched_field_name = matched;
+ curr_pos = static_cast(thrust::distance(chars.data(), end));
+ current_token = json_token::FIELD_NAME;
} else {
set_current_error();
}
@@ -1228,11 +1196,12 @@ class json_parser {
/**
* continute parsing the next token and update current token
* Note: only parse one token at a time
- * @param[out] has_comma_before_token has comma before next token
- * @param[out] has_colon_before_token has colon before next token
*/
- __device__ inline void parse_next_token_and_set_current(bool& has_comma_before_token,
- bool& has_colon_before_token)
+ __device__ inline void parse_next_token_and_set_current(
+ bool& has_comma_before_token,
+ bool& has_colon_before_token,
+ bool& matched_field_name,
+ char_range to_match_field_name = char_range::null())
{
skip_whitespaces();
if (!eof()) {
@@ -1264,7 +1233,7 @@ class json_parser {
current_token = json_token::END_OBJECT;
} else {
// parse key in key:value pair
- parse_field_name_and_set_current();
+ parse_field_name_and_set_current(matched_field_name, to_match_field_name);
}
} else if (current_token == json_token::FIELD_NAME) {
if (c == ':') {
@@ -1289,7 +1258,7 @@ class json_parser {
// parse next key:value pair
curr_pos++;
skip_whitespaces();
- parse_field_name_and_set_current();
+ parse_field_name_and_set_current(matched_field_name, to_match_field_name);
} else {
set_current_error();
}
@@ -1351,10 +1320,29 @@ class json_parser {
// parse next token
bool has_comma_before_token; // no-initialization because of do not care here
bool has_colon_before_token; // no-initialization because of do not care here
- parse_next_token_and_set_current(has_comma_before_token, has_colon_before_token);
+ bool matched_field_name; // no-initialization because of do not care here
+ parse_next_token_and_set_current(
+ has_comma_before_token, has_colon_before_token, matched_field_name);
return current_token;
}
+ /**
+ * Continute parsing the next token. If the token is a field name then check if it is
+ * matched with the given name.
+ */
+ __device__ bool parse_next_token_with_matching(cudf::string_view to_match_field_name)
+ {
+ // parse next token
+ bool has_comma_before_token; // no-initialization because of do not care here
+ bool has_colon_before_token; // no-initialization because of do not care here
+ bool matched_field_name;
+ parse_next_token_and_set_current(has_comma_before_token,
+ has_colon_before_token,
+ matched_field_name,
+ char_range{to_match_field_name});
+ return matched_field_name;
+ }
+
/**
* get current token
*/
@@ -1573,31 +1561,6 @@ class json_parser {
return 0;
}
- /**
- * match field name string when current token is FIELD_NAME,
- * return true if current token is FIELD_NAME and match successfully.
- * return false otherwise,
- */
- __device__ bool match_current_field_name(cudf::string_view name) const
- {
- return match_current_field_name(char_range(name));
- }
-
- /**
- * match current field name
- */
- __device__ bool match_current_field_name(char_range name) const
- {
- if (json_token::FIELD_NAME == current_token) {
- char_range_reader reader(current_range());
- char_range_reader to_match(name);
- auto [b, end_pos] = try_parse_string(reader, to_match, escape_style::UNESCAPED);
- return b;
- } else {
- return false;
- }
- }
-
/**
* copy current structure to destination.
* return false if meets JSON format error,
@@ -1648,7 +1611,9 @@ class json_parser {
bool has_colon_before_token = false;
// parse and get has_comma_before_token, has_colon_before_token
- parse_next_token_and_set_current(has_comma_before_token, has_colon_before_token);
+ bool matched_field_name; // unused
+ parse_next_token_and_set_current(
+ has_comma_before_token, has_colon_before_token, matched_field_name);
// check the JSON format
if (current_token == json_token::ERROR) { return thrust::make_pair(false, 0); }
From 9356867d10dcacdd88005c79e10c44040e239afd Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans"
Date: Fri, 11 Oct 2024 16:26:54 -0500
Subject: [PATCH 050/157] Nvcomp revert followup (#2497)
Signed-off-by: Robert (Bobby) Evans
---
ci/submodule-sync.sh | 2 +-
patches/revert_nvcomp4.patch | 82 +++++++++++++++++++++---------
pom.xml | 8 ++-
thirdparty/cudf-pins/versions.json | 8 +--
4 files changed, 68 insertions(+), 32 deletions(-)
diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh
index 29b0cf5dad..bd9d8d87bb 100755
--- a/ci/submodule-sync.sh
+++ b/ci/submodule-sync.sh
@@ -71,7 +71,7 @@ echo "Test against ${cudf_sha}..."
MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B"
set +e
# Don't do a full build. Just try to update/build CUDF with no patches on top of it.
-${MVN} validate ${MVN_MIRROR} \
+${MVN} antrun:run@build-libcudf ${MVN_MIRROR} \
-DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
-Dlibcudf.build.configure=true \
-Dlibcudf.dependency.mode=latest \
diff --git a/patches/revert_nvcomp4.patch b/patches/revert_nvcomp4.patch
index 88b58b14dc..914c033088 100644
--- a/patches/revert_nvcomp4.patch
+++ b/patches/revert_nvcomp4.patch
@@ -25,7 +25,7 @@ index 5e9f7f8a0c..0e4745bda2 100755
${package_dir}/dist/*
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
-index 5a05dfd053..e7363645d6 100644
+index bd5e6c3d56..74ca3fda1a 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -58,7 +58,7 @@ dependencies:
@@ -38,11 +38,11 @@ index 5a05dfd053..e7363645d6 100644
- openpyxl
- packaging
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
-index 8490296233..3559a1a341 100644
+index 565a3ebfa3..22619acf4a 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -56,7 +56,7 @@ dependencies:
- - numba>=0.57
+ - numba-cuda>=0.0.13
- numpy>=1.23,<3.0a0
- numpydoc
-- nvcomp==4.0.1
@@ -50,6 +50,15 @@ index 8490296233..3559a1a341 100644
- nvtx>=0.2.1
- openpyxl
- packaging
+@@ -67,7 +67,7 @@ dependencies:
+ - pre-commit
+ - pyarrow>=14.0.0,<18.0.0a0
+ - pydata-sphinx-theme!=0.14.2
+-- pynvjitlink>=0.0.0a0
++- pynvjitlink
+ - pytest-benchmark
+ - pytest-cases>=3.8.2
+ - pytest-cov
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index dc75eb4b25..67d501d746 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
@@ -697,7 +706,7 @@ index 60a64fb0ee..40cfbe763b 100644
/**
diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu
-index c588fedb85..bab70c126b 100644
+index 27312a4da8..779d40281b 100644
--- a/cpp/src/io/parquet/reader_impl_chunking.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking.cu
@@ -865,18 +865,8 @@ std::vector compute_page_splits_by_row(device_span=2.5.0,<2.6.0a0
+@@ -381,21 +381,21 @@ dependencies:
+ - output_types: conda
+ packages:
# Align nvcomp version with rapids-cmake
- - nvcomp==4.0.1
+ - nvcomp==3.0.6
- - spdlog>=1.14.1,<1.15
+ specific:
+ - output_types: [requirements, pyproject]
+ matrices:
+ - matrix:
+ cuda: "12.*"
+ packages:
+- - nvidia-nvcomp-cu12==4.0.1
++ - nvidia-nvcomp-cu12==3.0.6
+ - matrix:
+ cuda: "11.*"
+ packages:
+- - nvidia-nvcomp-cu11==4.0.1
++ - nvidia-nvcomp-cu11==3.0.6
+ - matrix:
+ packages:
+- - nvidia-nvcomp==4.0.1
++ - nvidia-nvcomp==3.0.6
rapids_build_skbuild:
common:
+ - output_types: [conda, requirements, pyproject]
+@@ -665,7 +665,7 @@ dependencies:
+ matrices:
+ - matrix: {cuda: "12.*"}
+ packages:
+- - &pynvjitlink_unsuffixed pynvjitlink>=0.0.0a0
++ - &pynvjitlink_unsuffixed pynvjitlink
+ - matrix: {cuda: "11.*"}
+ packages:
+ - &cubinlinker_unsuffixed cubinlinker
+@@ -676,7 +676,7 @@ dependencies:
+ cuda: "12.*"
+ cuda_suffixed: "true"
+ packages:
+- - pynvjitlink-cu12>=0.0.0a0
++ - pynvjitlink-cu12
+ - matrix:
+ cuda: "12.*"
+ cuda_suffixed: "false"
diff --git a/docs/cudf/source/user_guide/io/io.md b/docs/cudf/source/user_guide/io/io.md
index 97b961b455..adcdaa51e7 100644
--- a/docs/cudf/source/user_guide/io/io.md
@@ -843,7 +887,7 @@ index 97b961b455..adcdaa51e7 100644
-
```
diff --git a/java/pom.xml b/java/pom.xml
-index e4f1cdf64e..9694e741f1 100644
+index 450cfbdbc8..55cb055398 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -1,6 +1,6 @@
@@ -893,15 +937,3 @@ index 32045f3c50..c18a90140b 100644
COMMENT "Copying nvcomp libraries to ${PROJECT_BINARY_DIR}"
)
endif()
-diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt
-index 0a8f5c4807..96eb6c3bb3 100644
---- a/python/libcudf/CMakeLists.txt
-+++ b/python/libcudf/CMakeLists.txt
-@@ -48,5 +48,6 @@ add_subdirectory(../../cpp cudf-cpp)
- # Ensure other libraries needed by libcudf.so get installed alongside it.
- include(cmake/Modules/WheelHelpers.cmake)
- install_aliased_imported_targets(
-- TARGETS cudf nvcomp::nvcomp DESTINATION ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
-+ TARGETS cudf nvcomp::nvcomp nvcomp::nvcomp_gdeflate nvcomp::nvcomp_bitcomp DESTINATION
-+ ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
- )
diff --git a/pom.xml b/pom.xml
index b9c6877688..c3156fdb57 100644
--- a/pom.xml
+++ b/pom.xml
@@ -429,7 +429,7 @@
build-libcudf
validate
-
+
@@ -466,7 +466,8 @@
+ executable="cmake"
+ unless:true="${submodule.patch.skip}">
@@ -483,6 +484,7 @@
build-libcudfjni
validate
+ ${submodule.patch.skip}
build-sparkrapidsjni
validate
+ ${submodule.patch.skip}
build-info
generate-resources
+ ${submodule.patch.skip}
Date: Sat, 12 Oct 2024 10:52:51 +0800
Subject: [PATCH 051/157] Update submodule cudf to
be1dd3267ed3cf7045c573ccc622f34fd159675f (#2500)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/versions.json | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 1436cac9de..be1dd3267e 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 1436cac9de8b450a32e71d5b779503e9a29edaa6
+Subproject commit be1dd3267ed3cf7045c573ccc622f34fd159675f
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 72c0ecec7f..df743054ff 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "1ef4094331be58ce881e534d669da706bdb979ed",
+ "git_tag" : "22668fa1d9ea5918f463c52bcdcb5ef181e5d1d0",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
@@ -149,7 +149,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "90a5631e1093ce44c4feceb88fcf557c3dfc043b",
+ "git_tag" : "1b70ffdd5ab460ac481f1575c42e8c1fccfda792",
"git_url" : "https://github.com/rapidsai/rmm.git",
"version" : "24.12"
},
@@ -162,4 +162,4 @@
"version" : "1.14.1"
}
}
-}
+}
\ No newline at end of file
From 282e0d0a7cce8d1c24ad6e44a5d22d6bb1733599 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Sat, 12 Oct 2024 16:02:08 +0800
Subject: [PATCH 052/157] Update submodule cudf to
4dbb8a354a9d4f0b4d82a5bf9747409c6304358f (#2501)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index be1dd3267e..4dbb8a354a 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit be1dd3267ed3cf7045c573ccc622f34fd159675f
+Subproject commit 4dbb8a354a9d4f0b4d82a5bf9747409c6304358f
From 2c3b60cc8c3411350bfa2639d4f752fa9c4e4f96 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans"
Date: Mon, 14 Oct 2024 17:29:29 -0500
Subject: [PATCH 053/157] Make it so applying and removing patches are
repeatable without errors (#2502)
* Make it so applying and removing patches are repeatable without errors
Signed-off-by: Robert (Bobby) Evans
* Adjust config for skipping a patch
* More fixes
Signed-off-by: Robert (Bobby) Evans
---------
Signed-off-by: Robert (Bobby) Evans
---
build/apply-patches | 49 +++++++++++++++++++++++++++-----
build/unapply-patches | 66 ++++++++++++++++++++++++++++++++++---------
ci/submodule-sync.sh | 5 ++--
pom.xml | 12 ++++----
4 files changed, 103 insertions(+), 29 deletions(-)
diff --git a/build/apply-patches b/build/apply-patches
index 991613e6dc..31c2adcfdd 100755
--- a/build/apply-patches
+++ b/build/apply-patches
@@ -16,8 +16,6 @@
# limitations under the License.
#
-# Run a command in a Docker container with devtoolset
-
set -e
BASE_DIR=$( git rev-parse --show-toplevel )
@@ -26,14 +24,51 @@ PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}
CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}
+# Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
+# making sure that a user can do development work in spark-rapids-jni without the patches
+# getting in the way
+# The operations I really want to support no matter what state CUDF is in are
+# 1) Build the repo from scratch
+# 2) Rebuild the repo without having to clean and start over
+# 3) upmerge to a new version of the plugin including updating the cudf submodule
+#
+# Building from scratch is simple. We want clean to unapply any patches and
+# build to apply them. But if we want to rebuild without a clean we need to know what
+# state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
+# is to save some state files about what happened. But a user could mess with CUDF directly
+# so we want to have ways to double check that they are indeed correct.
+
+FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"
+
pushd "$CUDF_DIR"
-if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
- echo "Error: CUDF repository has uncommitted changes. No patches will be applied..."
- exit 1
+
+PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)
+
+if [ -z "$PATCH_FILES" ] ; then
+ echo "No patches to apply"
+ exit 0
+fi
+
+CHANGED_FILES=$(git status --porcelain --untracked-files=no)
+
+if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then
+ if git apply -R --check "$FULLY_PATCHED_FILE" ; then
+ echo "Patches appear to have been applied already"
+ exit 0
+ fi
+fi
+
+if [ -n "$CHANGED_FILES" ] ; then
+ echo "Error: CUDF repository has uncommitted changes. No patches will be applied. Please clean the repository so we can try and add the needed patches"
+ echo "$CHANGED_FILE"
+ exit 1
fi
find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV | while IFS= read -r -d '' file; do
- echo "patching with: $file"
- patch --no-backup-if-mismatch -f -t --reject-file=- -p1 -i "$file"
+ echo "patching with: $file"
+ git apply -v "$file"
done
+
+git diff > "$FULLY_PATCHED_FILE"
+
popd
diff --git a/build/unapply-patches b/build/unapply-patches
index 186a781ade..a31708e25f 100755
--- a/build/unapply-patches
+++ b/build/unapply-patches
@@ -16,29 +16,67 @@
# limitations under the License.
#
-# Run a command in a Docker container with devtoolset
-
set -e
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+BASE_DIR=$( git rev-parse --show-toplevel )
+
+PATCH_DIR=${PATCH_DIR:-$(realpath "$BASE_DIR/patches/")}
-PATCH_DIR=${PATCH_DIR:-$(realpath "$SCRIPT_DIR/../patches/")}
+CUDF_DIR=${CUDF_DIR:-$(realpath "$BASE_DIR/thirdparty/cudf/")}
-CUDF_DIR=${CUDF_DIR:-$(realpath "$SCRIPT_DIR/../thirdparty/cudf/")}
+# Apply pattches to CUDF is problematic in a number of ways. But ultimately it comes down to
+# making sure that a user can do development work in spark-rapids-jni without the patches
+# getting in the way
+# The operations I really want to support no matter what state CUDF is in are
+# 1) Build the repo from scratch
+# 2) Rebuild the repo without having to clean and start over
+# 3) upmerge to a new version of the plugin including updating the cudf submodule
+#
+# Building from scratch is simple. We want clean to unapply any patches and
+# build to apply them. But if we want to rebuild without a clean we need to know what
+# state the CUDF repo is in. Did we apply patches to it or not. The fastest way to do this
+# is to save some state files about what happened. But a user could mess with CUDF directly
+# so we want to have ways to double check that they are indeed correct.
+FULLY_PATCHED_FILE="$CUDF_DIR/spark-rapids-jni.patch"
pushd "$CUDF_DIR"
-if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
- #only try to remove patches if it looks like something was changed
- find "$PATCH_DIR" -maxdepth 1 -type f -print0 | sort -zV -r | while IFS= read -r -d '' file; do
- echo "patching with: $file"
- patch -R --no-backup-if-mismatch --reject-file=- -f -t -p1 -i "$file"
- done
+
+PATCH_FILES=$(find "$PATCH_DIR" -type f -not -empty)
+
+if [ -z "$PATCH_FILES" ] ; then
+ echo "No patches to remove"
+ exit 0
fi
-# Check for modifications
-if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
- echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected"
+CHANGED_FILES=$(git status --porcelain --untracked-files=no)
+
+if [ \( -s "$FULLY_PATCHED_FILE" \) -a \( -n "$CHANGED_FILES" \) ] ; then
+ if git apply --check -R "$FULLY_PATCHED_FILE"; then
+ echo "Patches appear to have been applied, so going to remove them"
+ git apply -R -v "$FULLY_PATCHED_FILE"
+ rm -f "$FULLY_PATCHED_FILE"
+
+ # Check for modifications, again
+ if [ -n "$(git status --porcelain --untracked-files=no)" ] ; then
+ echo "Error: CUDF repository has uncommitted changes. You might want to clean in manually if you know that is expected"
+ git status --porcelain --untracked-files=no
+ exit 1
+ fi
+
+ exit 0
+ else
+ echo "Files are changed, but in a way where the full path file does not apply to remove them $FULL_PATCHED_FILE"
exit 1
+ fi
fi
+
+if [ -n "$CHANGED_FILES" ] ; then
+ echo "Error: CUDF repository has uncommitted changes, but does not appear to have been patched. Please clean it and try again."
+ echo "$CHANGED_FILE"
+ exit 1
+else
+ echo "No changes in CUDF repository to remove"
+fi
+
popd
diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh
index bd9d8d87bb..25cc6b9901 100755
--- a/ci/submodule-sync.sh
+++ b/ci/submodule-sync.sh
@@ -71,12 +71,13 @@ echo "Test against ${cudf_sha}..."
MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B"
set +e
# Don't do a full build. Just try to update/build CUDF with no patches on top of it.
+# calling the antrun directly skips applying patches and also only builds
+# libcudf
${MVN} antrun:run@build-libcudf ${MVN_MIRROR} \
-DCPP_PARALLEL_LEVEL=${PARALLEL_LEVEL} \
-Dlibcudf.build.configure=true \
-Dlibcudf.dependency.mode=latest \
- -Dsubmodule.patch.skip \
- -DUSE_GDS=ON -Dtest=*,!CuFileTest,!CudaFatalTest,!ColumnViewNonEmptyNullsTest \
+ -DUSE_GDS=ON \
-DBUILD_TESTS=ON \
-DUSE_SANITIZER=ON
validate_status=$?
diff --git a/pom.xml b/pom.xml
index c3156fdb57..a50feefb22 100644
--- a/pom.xml
+++ b/pom.xml
@@ -110,6 +110,10 @@
UTF-8
1.7.30
false
+
false
3.0.0
0.2.2
@@ -429,7 +433,7 @@
build-libcudf
validate
-
+
@@ -466,8 +470,7 @@
+ executable="cmake">
@@ -484,7 +487,6 @@
build-libcudfjni
validate
- ${submodule.patch.skip}
build-sparkrapidsjni
validate
- ${submodule.patch.skip}
build-info
generate-resources
- ${submodule.patch.skip}
Date: Tue, 15 Oct 2024 11:27:36 -0500
Subject: [PATCH 054/157] Update to latest cudf 24.12 and add cudftestutil_impl
dependency to tests (#2505)
Signed-off-by: Jason Lowe
---
src/main/cpp/tests/CMakeLists.txt | 3 ++-
thirdparty/cudf | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/main/cpp/tests/CMakeLists.txt b/src/main/cpp/tests/CMakeLists.txt
index 244d18c903..c774d30618 100644
--- a/src/main/cpp/tests/CMakeLists.txt
+++ b/src/main/cpp/tests/CMakeLists.txt
@@ -31,7 +31,8 @@ function(ConfigureTest CMAKE_TEST_NAME)
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
target_link_libraries(${CMAKE_TEST_NAME} GTest::gtest_main GTest::gmock_main cudf::cudf
- cudf::cudftestutil spark_rapids_jni)
+ cudf::cudftestutil cudf::cudftestutil_impl
+ spark_rapids_jni)
add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME})
install(
TARGETS ${CMAKE_TEST_NAME}
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 4dbb8a354a..319ec3b803 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 4dbb8a354a9d4f0b4d82a5bf9747409c6304358f
+Subproject commit 319ec3b8031e4deb7dfc3f4c4a07a10ef88c131f
From 41945c62f023a5cf232463d8eeeb33d6951781a7 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 16 Oct 2024 02:27:06 +0800
Subject: [PATCH 055/157] Update submodule cudf to
7bcfc87935b7a202002d54e17e140789b02f16e9 (#2507)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 319ec3b803..7bcfc87935 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 319ec3b8031e4deb7dfc3f4c4a07a10ef88c131f
+Subproject commit 7bcfc87935b7a202002d54e17e140789b02f16e9
From e118e6eca1f6f67d4699a7eae6cb7d5481d87a20 Mon Sep 17 00:00:00 2001
From: Peixin
Date: Wed, 16 Oct 2024 08:50:30 +0800
Subject: [PATCH 056/157] Make submodule-sync always try update cudf-pins
(#2504)
Signed-off-by: Peixin Li
---
ci/submodule-sync.sh | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh
index 25cc6b9901..ea7c06b7ec 100755
--- a/ci/submodule-sync.sh
+++ b/ci/submodule-sync.sh
@@ -57,17 +57,19 @@ if [ -n "$CUDF_TAG" ]; then
else
git submodule update --remote --merge
fi
+
+cudf_pins_only=false
cudf_sha=$(git -C thirdparty/cudf rev-parse HEAD)
if [[ "${cudf_sha}" == "${cudf_prev_sha}" ]]; then
- echo "Submodule is up to date."
- exit 0
+ echo "cuDF submodule is up to date. Try update cudf-pins..."
+ cudf_pins_only=true
+else
+ echo "Try update cudf submodule to ${cudf_sha}..."
+ git add .
+ git commit -s -m "Update submodule cudf to ${cudf_sha}"
fi
-echo "Try update cudf submodule to ${cudf_sha}..."
-git add .
-
-echo "Test against ${cudf_sha}..."
-
+echo "Build libcudf only to update pinned versions..."
MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -B"
set +e
# Don't do a full build. Just try to update/build CUDF with no patches on top of it.
@@ -101,9 +103,17 @@ sed -i -e 's/4\.0\.1\.0/3.0.6/' \
# the updated versions.json generated by the build
echo "Update cudf submodule to ${cudf_sha} with updated pinned versions"
git add .
-git diff-index --quiet HEAD || git commit -s -m "Update submodule cudf to ${cudf_sha}"
+if ! git diff-index --quiet HEAD; then
+ # We perform a squash merge for submodule-sync commits
+ git commit -s -m "Update pinned versions for cudf ${cudf_sha}"
+elif ${cudf_pins_only}; then
+ echo "No changes to commit. Exit early..."
+ exit 0
+fi
+
sha=$(git rev-parse HEAD)
+echo "Test against ${cudf_sha}..."
set +e
# now build and test everything with the patches in place
${MVN} clean verify ${MVN_MIRROR} \
From 33a92f75a550b715d7d4cd7ede799264fc2c4dbe Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 16 Oct 2024 16:48:57 +0800
Subject: [PATCH 057/157] Update submodule cudf to
3420c71cb72f63db8d63164446cca042f354a08e (#2508)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 7bcfc87935..3420c71cb7 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 7bcfc87935b7a202002d54e17e140789b02f16e9
+Subproject commit 3420c71cb72f63db8d63164446cca042f354a08e
From fd67ca0ab02c3fb22c078d22b332d872523b94f0 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 16 Oct 2024 22:17:28 +0800
Subject: [PATCH 058/157] Update pinned versions for cudf
3420c71cb72f63db8d63164446cca042f354a08e (#2509)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf-pins/versions.json | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index df743054ff..7be36774f6 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "22668fa1d9ea5918f463c52bcdcb5ef181e5d1d0",
+ "git_tag" : "a34d6bf039b945cfe4e65993373b28e153abbaa7",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
@@ -149,7 +149,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "1b70ffdd5ab460ac481f1575c42e8c1fccfda792",
+ "git_tag" : "de42f5711386f6b914cef0fc54d3081a936c5740",
"git_url" : "https://github.com/rapidsai/rmm.git",
"version" : "24.12"
},
From e53547bb6b4e4b02a4f28c64f4958a56fa760bf1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 16 Oct 2024 11:14:26 -0500
Subject: [PATCH 059/157] Bump org.apache.hadoop:hadoop-common from 3.2.4 to
3.4.0 (#2432)
Bumps org.apache.hadoop:hadoop-common from 3.2.4 to 3.4.0.
---
updated-dependencies:
- dependency-name: org.apache.hadoop:hadoop-common
dependency-type: direct:development
...
Signed-off-by: dependabot[bot]
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index a50feefb22..641bb25e90 100644
--- a/pom.xml
+++ b/pom.xml
@@ -94,7 +94,7 @@
${cuda.version}
${project.basedir}/thirdparty/cudf
${project.basedir}/thirdparty/cudf-pins/
- 3.2.4
+ 3.4.0
5.8.1
${project.build.directory}/libcudf/cmake-build/
false
From 252edb89e6f63e00bbce6c17e5e189d396ae4cea Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Thu, 17 Oct 2024 12:19:35 +0800
Subject: [PATCH 060/157] [submodule-sync] bot-submodule-sync-branch-24.12 to
branch-24.12 [skip ci] [bot] (#2511)
* Update submodule cudf to c9202a0797c1b23f02edbdef34d292ebfd74117f
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
* Update pinned versions for cudf c9202a0797c1b23f02edbdef34d292ebfd74117f
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---------
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/rapids-cmake.sha | 2 +-
thirdparty/cudf-pins/versions.json | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 3420c71cb7..c9202a0797 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 3420c71cb72f63db8d63164446cca042f354a08e
+Subproject commit c9202a0797c1b23f02edbdef34d292ebfd74117f
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
index 1073147e4f..f098825f05 100644
--- a/thirdparty/cudf-pins/rapids-cmake.sha
+++ b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -1 +1 @@
-27b7b6686d2ffd7f4d4372700fd54f33bcaf67ae
+61bcb7d39c5aad77100ab5733cbdddf1651dbe11
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 7be36774f6..084117b584 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -149,7 +149,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "de42f5711386f6b914cef0fc54d3081a936c5740",
+ "git_tag" : "50e60a868af05cc9f65b9980753d708e7170f3a1",
"git_url" : "https://github.com/rapidsai/rmm.git",
"version" : "24.12"
},
From 24fafddd0b8aa2590bc735e64f19f9eea8fa9fae Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Thu, 17 Oct 2024 16:51:07 +0800
Subject: [PATCH 061/157] Update submodule cudf to
3683e4685ff0f0bc8122fe654742f708bf9fdbcc (#2512)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index c9202a0797..3683e4685f 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit c9202a0797c1b23f02edbdef34d292ebfd74117f
+Subproject commit 3683e4685ff0f0bc8122fe654742f708bf9fdbcc
From 6d2c0928156d6d76db3ed57856014de37c434dd4 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 18 Oct 2024 02:55:23 +0800
Subject: [PATCH 062/157] Update submodule cudf to
14209c1962f1615f82f2c5be1cdbf58a6ed05789 (#2513)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 3683e4685f..14209c1962 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 3683e4685ff0f0bc8122fe654742f708bf9fdbcc
+Subproject commit 14209c1962f1615f82f2c5be1cdbf58a6ed05789
From 1beb0c80c3bcf4b6cf187cd2bcb1984939ad53a0 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 18 Oct 2024 07:03:31 +0800
Subject: [PATCH 063/157] Update submodule cudf to
00feb82cbda10bf65343e08d54ed9e893ff4aa71 (#2514)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 14209c1962..00feb82cbd 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 14209c1962f1615f82f2c5be1cdbf58a6ed05789
+Subproject commit 00feb82cbda10bf65343e08d54ed9e893ff4aa71
From 8a672b6de55566b7b6557b8dc29b73ee21a0b17c Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 18 Oct 2024 10:55:12 +0800
Subject: [PATCH 064/157] Update submodule cudf to
ce93c366c451e27a49583cbb809bf5579a4bcf15 (#2515)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 00feb82cbd..ce93c366c4 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 00feb82cbda10bf65343e08d54ed9e893ff4aa71
+Subproject commit ce93c366c451e27a49583cbb809bf5579a4bcf15
From 797101fa2a365c706fc96fd3339cab5b5b4c3257 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 18 Oct 2024 16:50:36 +0800
Subject: [PATCH 065/157] Update submodule cudf to
b8917229f8a2446c7e5f697475f76743a05e6856 (#2516)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index ce93c366c4..b8917229f8 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit ce93c366c451e27a49583cbb809bf5579a4bcf15
+Subproject commit b8917229f8a2446c7e5f697475f76743a05e6856
From 4765d5c1cbcda4ab132e9331b4746696ece7daae Mon Sep 17 00:00:00 2001
From: Nghia Truong <7416935+ttnghia@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:16:22 -0700
Subject: [PATCH 066/157] Use `cudf::make_strings_column_batch` in
`get_json_object` (#2499)
* Testing
Signed-off-by: Nghia Truong
* Use `make_strings_column_batch`
Signed-off-by: Nghia Truong
---------
Signed-off-by: Nghia Truong
---
src/main/cpp/src/get_json_object.cu | 26 +++++++++++++++++++-------
1 file changed, 19 insertions(+), 7 deletions(-)
diff --git a/src/main/cpp/src/get_json_object.cu b/src/main/cpp/src/get_json_object.cu
index 622a56bc55..8fce46bca4 100644
--- a/src/main/cpp/src/get_json_object.cu
+++ b/src/main/cpp/src/get_json_object.cu
@@ -1019,7 +1019,6 @@ std::vector> get_json_object_batch(
construct_path_commands(json_paths, stream);
auto const num_outputs = json_paths.size();
- std::vector> output;
// The error check array contains markers denoting if there is any out-of-bound write occurs
// (first `num_outputs` elements), or if the nesting depth exceeded its limits (the last element).
@@ -1052,19 +1051,23 @@ std::vector> get_json_object_batch(
auto d_path_data = cudf::detail::make_device_uvector_async(
h_path_data, stream, rmm::mr::get_current_device_resource());
thrust::uninitialized_fill(
- rmm::exec_policy(stream), d_error_check.begin(), d_error_check.end(), 0);
+ rmm::exec_policy_nosync(stream), d_error_check.begin(), d_error_check.end(), 0);
kernel_launcher::exec(input, d_path_data, d_max_path_depth_exceeded, stream);
auto h_error_check = cudf::detail::make_host_vector_sync(d_error_check, stream);
auto has_no_oob = check_error(h_error_check);
+ std::vector const>>
+ batch_stringviews;
+ batch_stringviews.reserve(out_stringviews.size());
+
// If we didn't see any out-of-bound write, everything is good so far.
// Just gather the output strings and return.
if (has_no_oob) {
for (auto const& out_sview : out_stringviews) {
- output.emplace_back(cudf::make_strings_column(out_sview, stream, mr));
+ batch_stringviews.emplace_back(out_sview);
}
- return output;
+ return cudf::make_strings_column_batch(batch_stringviews, stream, mr);
}
// From here, we had out-of-bound write. Although this is very rare, it may still happen.
@@ -1072,6 +1075,7 @@ std::vector> get_json_object_batch(
std::vector, int64_t>> out_offsets_and_sizes;
std::vector> out_char_buffers;
std::vector oob_indices;
+ std::vector no_oob_indices;
// Check validity from the stored char pointers.
auto const validator = [] __device__(thrust::pair const item) {
@@ -1085,7 +1089,6 @@ std::vector> get_json_object_batch(
if (h_error_check[idx]) {
oob_indices.emplace_back(idx);
- output.emplace_back(nullptr); // just placeholder.
out_null_masks_and_null_counts.emplace_back(
cudf::detail::valid_if(out_sview.begin(), out_sview.end(), validator, stream, mr));
@@ -1111,9 +1114,18 @@ std::vector> get_json_object_batch(
out_char_buffers.back().data(),
d_error_check.data() + idx});
} else {
- output.emplace_back(cudf::make_strings_column(out_sview, stream, mr));
+ no_oob_indices.emplace_back(idx);
+ batch_stringviews.emplace_back(out_sview);
}
}
+
+ std::vector> output(num_outputs);
+ auto no_oob_output = cudf::make_strings_column_batch(batch_stringviews, stream, mr);
+ for (std::size_t idx = 0; idx < no_oob_indices.size(); ++idx) {
+ auto const out_idx = no_oob_indices[idx];
+ output[out_idx] = std::move(no_oob_output[idx]);
+ }
+
// These buffers are no longer needed.
scratch_buffers.clear();
out_stringviews.clear();
@@ -1122,7 +1134,7 @@ std::vector> get_json_object_batch(
d_path_data = cudf::detail::make_device_uvector_async(
h_path_data, stream, rmm::mr::get_current_device_resource());
thrust::uninitialized_fill(
- rmm::exec_policy(stream), d_error_check.begin(), d_error_check.end(), 0);
+ rmm::exec_policy_nosync(stream), d_error_check.begin(), d_error_check.end(), 0);
kernel_launcher::exec(input, d_path_data, d_max_path_depth_exceeded, stream);
h_error_check = cudf::detail::make_host_vector_sync(d_error_check, stream);
has_no_oob = check_error(h_error_check);
From 340e27198110bed23d05dcddfde5b488bce10662 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Sat, 19 Oct 2024 07:00:49 +0800
Subject: [PATCH 067/157] Update submodule cudf to
6ad90742f5a1efa5eecbbad25dddc46c1ed5c801 (#2517)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index b8917229f8..6ad90742f5 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit b8917229f8a2446c7e5f697475f76743a05e6856
+Subproject commit 6ad90742f5a1efa5eecbbad25dddc46c1ed5c801
From 8913882710abaf7e1df507157abfcf7e19c20ea0 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Sat, 19 Oct 2024 10:45:05 +0800
Subject: [PATCH 068/157] Update submodule cudf to
98eef67d12670bd592022201b3c9dcc12374a34a (#2518)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 6ad90742f5..98eef67d12 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 6ad90742f5a1efa5eecbbad25dddc46c1ed5c801
+Subproject commit 98eef67d12670bd592022201b3c9dcc12374a34a
From 3aa3421a3202523c2f0d742fa5fb3977b6ffe387 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Sat, 19 Oct 2024 16:45:32 +0800
Subject: [PATCH 069/157] Update submodule cudf to
fdd2b262aa76400d3d57018461eba37892445a4b (#2519)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 98eef67d12..fdd2b262aa 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 98eef67d12670bd592022201b3c9dcc12374a34a
+Subproject commit fdd2b262aa76400d3d57018461eba37892445a4b
From 5a7c5ce0bb2869a445c044a431f8b0e66c6db9a3 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Sun, 20 Oct 2024 06:46:41 +0800
Subject: [PATCH 070/157] Update submodule cudf to
1ce2526bde7f77d2da7d0927a052fd9ccf69b9f2 (#2520)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index fdd2b262aa..1ce2526bde 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit fdd2b262aa76400d3d57018461eba37892445a4b
+Subproject commit 1ce2526bde7f77d2da7d0927a052fd9ccf69b9f2
From d0a55aa3f43d43537eaff50b655e7a7e20360a7d Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Sun, 20 Oct 2024 12:22:06 +0800
Subject: [PATCH 071/157] Update submodule cudf to
074ab749531aa136c546afc7837fec0b404fe022 (#2521)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 1ce2526bde..074ab74953 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 1ce2526bde7f77d2da7d0927a052fd9ccf69b9f2
+Subproject commit 074ab749531aa136c546afc7837fec0b404fe022
From ae6b48c6aa9e774fbb9130964d86a71edbc74ea6 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Tue, 22 Oct 2024 08:24:12 +0800
Subject: [PATCH 072/157] Update pinned versions for cudf
074ab749531aa136c546afc7837fec0b404fe022 (#2523)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf-pins/versions.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 084117b584..dff6415001 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "a34d6bf039b945cfe4e65993373b28e153abbaa7",
+ "git_tag" : "f2a056710e6b614cf7dfec17c2e860acd2eddbcc",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
From 5fe13b1013244659f8f0592994d5014469b5d3ed Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Tue, 22 Oct 2024 10:55:57 +0800
Subject: [PATCH 073/157] [submodule-sync] bot-submodule-sync-branch-24.12 to
branch-24.12 [skip ci] [bot] (#2524)
* Update submodule cudf to 69ca3874b97e9cce6efb71e3e33ec598b57908a3
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
* Update pinned versions for cudf 69ca3874b97e9cce6efb71e3e33ec598b57908a3
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---------
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/versions.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 074ab74953..69ca3874b9 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 074ab749531aa136c546afc7837fec0b404fe022
+Subproject commit 69ca3874b97e9cce6efb71e3e33ec598b57908a3
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index dff6415001..26a1fe58de 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "f2a056710e6b614cf7dfec17c2e860acd2eddbcc",
+ "git_tag" : "36c5c270990a2fe55f974e7d77bd7b24681629ba",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
From a11322db27b585439a178397bad1530ce0c691d0 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Tue, 22 Oct 2024 20:52:24 +0800
Subject: [PATCH 074/157] Update submodule cudf to
637e3206a4656bd38636f3fadf3c4573c7bc906a (#2525)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 69ca3874b9..637e3206a4 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 69ca3874b97e9cce6efb71e3e33ec598b57908a3
+Subproject commit 637e3206a4656bd38636f3fadf3c4573c7bc906a
From 75155c498c3b40dd2fd0d42bcb467ccda9573f00 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 23 Oct 2024 00:52:10 +0800
Subject: [PATCH 075/157] [submodule-sync] bot-submodule-sync-branch-24.12 to
branch-24.12 [skip ci] [bot] (#2526)
* Update submodule cudf to 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
* Update pinned versions for cudf 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---------
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/versions.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 637e3206a4..4fe338c0ef 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 637e3206a4656bd38636f3fadf3c4573c7bc906a
+Subproject commit 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 26a1fe58de..2841a245b9 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -149,7 +149,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "50e60a868af05cc9f65b9980753d708e7170f3a1",
+ "git_tag" : "1024a1250cfde7e93d26dc6d5e063e84c4a39824",
"git_url" : "https://github.com/rapidsai/rmm.git",
"version" : "24.12"
},
From dffb829212ce4118aecb979ae3c8111274e26c1b Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 23 Oct 2024 06:52:25 +0800
Subject: [PATCH 076/157] Update pinned versions for cudf
4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2 (#2527)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf-pins/rapids-cmake.sha | 2 +-
thirdparty/cudf-pins/versions.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
index f098825f05..da52b90f5a 100644
--- a/thirdparty/cudf-pins/rapids-cmake.sha
+++ b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -1 +1 @@
-61bcb7d39c5aad77100ab5733cbdddf1651dbe11
+ab9f5097b8cef743a6a5d1df1b75863054b47464
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 2841a245b9..4315fe9acf 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "36c5c270990a2fe55f974e7d77bd7b24681629ba",
+ "git_tag" : "fcf4b155314184e7f9ce1fa5209ca755a80a4867",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
From 9c4061a0830be5377267d3bd3fd50d57e2a69c30 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 23 Oct 2024 11:05:25 +0800
Subject: [PATCH 077/157] [submodule-sync] bot-submodule-sync-branch-24.12 to
branch-24.12 [skip ci] [bot] (#2528)
* Update submodule cudf to cff1296845aa9a4078dd0d95dd30b7e7c004f2d9
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
* Update pinned versions for cudf cff1296845aa9a4078dd0d95dd30b7e7c004f2d9
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---------
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/rapids-cmake.sha | 2 +-
thirdparty/cudf-pins/versions.json | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 4fe338c0ef..cff1296845 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 4fe338c0efe0fee2ee69c8207f9f4cbe9aa4d4a2
+Subproject commit cff1296845aa9a4078dd0d95dd30b7e7c004f2d9
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
index da52b90f5a..9aba80689c 100644
--- a/thirdparty/cudf-pins/rapids-cmake.sha
+++ b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -1 +1 @@
-ab9f5097b8cef743a6a5d1df1b75863054b47464
+e5897d8093393e263ad43d4ecbffe48b6a07ecbb
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 4315fe9acf..9fea155e9c 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "fcf4b155314184e7f9ce1fa5209ca755a80a4867",
+ "git_tag" : "52b672b6405f6312108263c289c3b042eb0bd50b",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
@@ -60,7 +60,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "71e8f81ebb61d17dcbe8df892d208f6401514bf6",
+ "git_tag" : "dc0f9fc20c2a544e53099e640a681b347532391a",
"git_url" : "https://github.com/NVIDIA/cuCollections.git",
"version" : "0.0.1"
},
From 156ad0c257e71c11f0bbba83196b809265b9c101 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 23 Oct 2024 22:23:06 +0800
Subject: [PATCH 078/157] Update submodule cudf to
3126f775c527a8df65df2e2cbc8c2b73da2219bf (#2529)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index cff1296845..3126f775c5 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit cff1296845aa9a4078dd0d95dd30b7e7c004f2d9
+Subproject commit 3126f775c527a8df65df2e2cbc8c2b73da2219bf
From 7b65899ef8a74536ad44fef0f44b8244c3ad4cde Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Thu, 24 Oct 2024 06:50:45 +0800
Subject: [PATCH 079/157] [submodule-sync] bot-submodule-sync-branch-24.12 to
branch-24.12 [skip ci] [bot] (#2530)
* Update submodule cudf to e7653a70743a76ad3c8ca4b377aa0ec4303e5556
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
* Update pinned versions for cudf e7653a70743a76ad3c8ca4b377aa0ec4303e5556
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---------
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/versions.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 3126f775c5..e7653a7074 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 3126f775c527a8df65df2e2cbc8c2b73da2219bf
+Subproject commit e7653a70743a76ad3c8ca4b377aa0ec4303e5556
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 9fea155e9c..d96a081e72 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "52b672b6405f6312108263c289c3b042eb0bd50b",
+ "git_tag" : "7715e36fcd3040f70a5d1edccf28a266bd572fd5",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
From f08cedfebc0f7fe54c5c4237ae7b67622a605964 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Thu, 24 Oct 2024 10:47:10 +0800
Subject: [PATCH 080/157] Update submodule cudf to
d7cdf44da2ba921c6fa63feff8749d141643f76e (#2531)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index e7653a7074..d7cdf44da2 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit e7653a70743a76ad3c8ca4b377aa0ec4303e5556
+Subproject commit d7cdf44da2ba921c6fa63feff8749d141643f76e
From d7b503583e47317a6c4f8c203306773350329cde Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Thu, 24 Oct 2024 16:54:24 +0800
Subject: [PATCH 081/157] Update pinned versions for cudf
d7cdf44da2ba921c6fa63feff8749d141643f76e (#2533)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf-pins/versions.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index d96a081e72..228094b53a 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "7715e36fcd3040f70a5d1edccf28a266bd572fd5",
+ "git_tag" : "9b077e51c778e7b05bee27fa52a5ecae62e00bb4",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
From d7e66ecba6c6c22a35ac227236b48ef617c059f8 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 25 Oct 2024 02:18:43 +0800
Subject: [PATCH 082/157] Update submodule cudf to
3a623149827ec347e721dd1a18072f18b0b4bcc1 (#2535)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index d7cdf44da2..3a62314982 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit d7cdf44da2ba921c6fa63feff8749d141643f76e
+Subproject commit 3a623149827ec347e721dd1a18072f18b0b4bcc1
From 1ba93499e655af155a27dc43e3ba68dd7cf76f56 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 25 Oct 2024 06:46:49 +0800
Subject: [PATCH 083/157] Update submodule cudf to
7115f20e91a314f07333cbd5c01adc62bf2fbb0c (#2536)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 3a62314982..7115f20e91 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 3a623149827ec347e721dd1a18072f18b0b4bcc1
+Subproject commit 7115f20e91a314f07333cbd5c01adc62bf2fbb0c
From 64635ecf7f0fa1d79c1ab4e06f3112bbcac88aae Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Fri, 25 Oct 2024 18:12:40 +0800
Subject: [PATCH 084/157] Update pinned versions for cudf
7115f20e91a314f07333cbd5c01adc62bf2fbb0c (#2537)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf-pins/versions.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 228094b53a..72831a48d8 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "9b077e51c778e7b05bee27fa52a5ecae62e00bb4",
+ "git_tag" : "dde7115b7a169bcc430b811225ccbac3711d7901",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
From 6f2b12c11637c1dc2cb789683452fd0d70e3c000 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Sat, 26 Oct 2024 00:51:41 +0800
Subject: [PATCH 085/157] [submodule-sync] bot-submodule-sync-branch-24.12 to
branch-24.12 [skip ci] [bot] (#2538)
* Update submodule cudf to e98e6b9209ff8557d85cb9b828b895884b0c7b7a
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
* Update pinned versions for cudf e98e6b9209ff8557d85cb9b828b895884b0c7b7a
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---------
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/versions.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 7115f20e91..e98e6b9209 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 7115f20e91a314f07333cbd5c01adc62bf2fbb0c
+Subproject commit e98e6b9209ff8557d85cb9b828b895884b0c7b7a
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index 72831a48d8..bb06629838 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "dde7115b7a169bcc430b811225ccbac3711d7901",
+ "git_tag" : "40dced5c6b9e3051722fc76554e83f405a462467",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
From c56716b420cb005385c632dfd9d5b0f22edefaa2 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Sat, 26 Oct 2024 06:46:34 +0800
Subject: [PATCH 086/157] [submodule-sync] bot-submodule-sync-branch-24.12 to
branch-24.12 [skip ci] [bot] (#2539)
* Update submodule cudf to 8bc9f19ebbb57bbc9bfa98efd94c8d7f8c65d316
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
* Update pinned versions for cudf 8bc9f19ebbb57bbc9bfa98efd94c8d7f8c65d316
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---------
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/versions.json | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index e98e6b9209..8bc9f19ebb 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit e98e6b9209ff8557d85cb9b828b895884b0c7b7a
+Subproject commit 8bc9f19ebbb57bbc9bfa98efd94c8d7f8c65d316
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index bb06629838..d333cfb05b 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -149,7 +149,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "1024a1250cfde7e93d26dc6d5e063e84c4a39824",
+ "git_tag" : "1ebfe0a4ee5f83a2ad54afcf99716944d20598dd",
"git_url" : "https://github.com/rapidsai/rmm.git",
"version" : "24.12"
},
From 2a04c9f2e06ab722a42f2ec85f9f848d1cb377f5 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Tue, 29 Oct 2024 00:51:21 +0800
Subject: [PATCH 087/157] Update submodule cudf to
8c4d1f201043a6802598bea3dcb58fa1e061d9e5 (#2540)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 8bc9f19ebb..8c4d1f2010 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 8bc9f19ebbb57bbc9bfa98efd94c8d7f8c65d316
+Subproject commit 8c4d1f201043a6802598bea3dcb58fa1e061d9e5
From ed440b96a1734a31c88b1de50ef8e51d9211666d Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Tue, 29 Oct 2024 10:46:32 +0800
Subject: [PATCH 088/157] Update submodule cudf to
1ad9fc1feef0ea0ee38adaa8f05cde6bb05aff0f (#2543)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 8c4d1f2010..1ad9fc1fee 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 8c4d1f201043a6802598bea3dcb58fa1e061d9e5
+Subproject commit 1ad9fc1feef0ea0ee38adaa8f05cde6bb05aff0f
From ee0716485ee15f8ad9dda0755806b35055450b5c Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Tue, 29 Oct 2024 16:52:24 +0800
Subject: [PATCH 089/157] Update submodule cudf to
bf5b778c265b3bfa712f509be0ba268216bcf3d0 (#2544)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 1ad9fc1fee..bf5b778c26 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 1ad9fc1feef0ea0ee38adaa8f05cde6bb05aff0f
+Subproject commit bf5b778c265b3bfa712f509be0ba268216bcf3d0
From 0f326603ecd19acf4cc13256257a6a9f2f6ec197 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 30 Oct 2024 00:56:04 +0800
Subject: [PATCH 090/157] Update submodule cudf to
3775f7b9f6509bd0f2f75c46edb60abf2522de86 (#2545)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index bf5b778c26..3775f7b9f6 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit bf5b778c265b3bfa712f509be0ba268216bcf3d0
+Subproject commit 3775f7b9f6509bd0f2f75c46edb60abf2522de86
From 02a5b34a037a1fc7be24b2de976303e7a2bca4d0 Mon Sep 17 00:00:00 2001
From: Zach Puller
Date: Tue, 29 Oct 2024 12:40:56 -0700
Subject: [PATCH 091/157] fix max bytes dealloc bug (#2541)
Signed-off-by: Zach Puller
---
src/main/cpp/src/SparkResourceAdaptorJni.cpp | 2 +-
.../java/com/nvidia/spark/rapids/jni/RmmSparkTest.java | 7 ++++++-
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/src/main/cpp/src/SparkResourceAdaptorJni.cpp b/src/main/cpp/src/SparkResourceAdaptorJni.cpp
index 8eeb047ddc..e09ef0dfdb 100644
--- a/src/main/cpp/src/SparkResourceAdaptorJni.cpp
+++ b/src/main/cpp/src/SparkResourceAdaptorJni.cpp
@@ -1780,6 +1780,7 @@ class spark_resource_adaptor final : public rmm::mr::device_memory_resource {
auto const thread = threads.find(tid);
if (thread != threads.end()) {
log_status("DEALLOC", tid, thread->second.task_id, thread->second.state);
+ if (!is_for_cpu) { thread->second.gpu_memory_allocated_bytes -= num_bytes; }
} else {
log_status("DEALLOC", tid, -2, thread_state::UNKNOWN);
}
@@ -1802,7 +1803,6 @@ class spark_resource_adaptor final : public rmm::mr::device_memory_resource {
if (is_for_cpu == t_state.is_cpu_alloc) {
transition(t_state, thread_state::THREAD_ALLOC_FREE);
}
- if (!is_for_cpu) { t_state.gpu_memory_allocated_bytes -= num_bytes; }
break;
default: break;
}
diff --git a/src/test/java/com/nvidia/spark/rapids/jni/RmmSparkTest.java b/src/test/java/com/nvidia/spark/rapids/jni/RmmSparkTest.java
index 987dd58534..270a4266cd 100644
--- a/src/test/java/com/nvidia/spark/rapids/jni/RmmSparkTest.java
+++ b/src/test/java/com/nvidia/spark/rapids/jni/RmmSparkTest.java
@@ -360,7 +360,7 @@ public void testInsertOOMsGpu() {
assertThrows(GpuSplitAndRetryOOM.class, () -> Rmm.alloc(100).close());
assertEquals(0, RmmSpark.getAndResetNumRetryThrow(taskid));
assertEquals(1, RmmSpark.getAndResetNumSplitRetryThrow(taskid));
- assertEquals(ALIGNMENT * 2, RmmSpark.getAndResetGpuMaxMemoryAllocated(taskid));
+ assertEquals(ALIGNMENT, RmmSpark.getAndResetGpuMaxMemoryAllocated(taskid));
// Verify that injecting OOM does not cause the block to actually happen
assertEquals(RmmSparkThreadState.THREAD_RUNNING, RmmSpark.getStateOf(threadId));
@@ -818,6 +818,11 @@ public void testBasicMixedBlocking() throws ExecutionException, InterruptedExcep
secondGpuAlloc.waitForAlloc();
secondGpuAlloc.freeAndWait();
}
+ // Do one more alloc after freeing on same task to show the max allocation metric is unimpacted
+ try (AllocOnAnotherThread secondGpuAlloc = new GpuAllocOnAnotherThread(taskThree, FIVE_MB)) {
+ secondGpuAlloc.waitForAlloc();
+ secondGpuAlloc.freeAndWait();
+ }
}
}
} finally {
From 47e1738c0936491779dc4a0811a4a905ddee1bc3 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 30 Oct 2024 08:22:40 +0800
Subject: [PATCH 092/157] [submodule-sync] bot-submodule-sync-branch-24.12 to
branch-24.12 [skip ci] [bot] (#2546)
* Update submodule cudf to 8d7b0d8bf0aebebde0a5036d2e51f5991ecbe63b
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
* Update pinned versions for cudf 8d7b0d8bf0aebebde0a5036d2e51f5991ecbe63b
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---------
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/versions.json | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 3775f7b9f6..8d7b0d8bf0 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 3775f7b9f6509bd0f2f75c46edb60abf2522de86
+Subproject commit 8d7b0d8bf0aebebde0a5036d2e51f5991ecbe63b
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index d333cfb05b..371ab5b422 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -44,7 +44,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "40dced5c6b9e3051722fc76554e83f405a462467",
+ "git_tag" : "10bc842eb9abb7af272f7d31fbeef310743ad062",
"git_url" : "https://github.com/rapidsai/kvikio.git",
"version" : "24.12"
},
@@ -149,7 +149,7 @@
{
"always_download" : true,
"git_shallow" : false,
- "git_tag" : "1ebfe0a4ee5f83a2ad54afcf99716944d20598dd",
+ "git_tag" : "47dae24b5578894ac0efc3c06930b7a5a069d988",
"git_url" : "https://github.com/rapidsai/rmm.git",
"version" : "24.12"
},
From 6ccc96f432cf500d397ee3f9c67cc728e9085883 Mon Sep 17 00:00:00 2001
From: Renjie Liu
Date: Wed, 30 Oct 2024 09:19:21 +0800
Subject: [PATCH 093/157] Add utility methods for kudo (#2542)
Signed-off-by: liurenjie1024
---
.../com/nvidia/spark/rapids/jni/Arms.java | 84 +++++++++++++++++++
.../com/nvidia/spark/rapids/jni/Pair.java | 42 ++++++++++
.../spark/rapids/jni/Preconditions.java | 42 ++++++++++
3 files changed, 168 insertions(+)
create mode 100644 src/main/java/com/nvidia/spark/rapids/jni/Arms.java
create mode 100644 src/main/java/com/nvidia/spark/rapids/jni/Pair.java
create mode 100644 src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java
diff --git a/src/main/java/com/nvidia/spark/rapids/jni/Arms.java b/src/main/java/com/nvidia/spark/rapids/jni/Arms.java
new file mode 100644
index 0000000000..4492711b0c
--- /dev/null
+++ b/src/main/java/com/nvidia/spark/rapids/jni/Arms.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.jni;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.function.Function;
+
+/**
+ * This class contains utility methods for automatic resource management.
+ */
+class Arms {
+ /**
+ * This method close the resource if an exception is thrown while executing the function.
+ */
+ public static T closeIfException(R resource, Function function) {
+ try {
+ return function.apply(resource);
+ } catch (Exception e) {
+ if (resource != null) {
+ try {
+ resource.close();
+ } catch (Exception inner) {
+ e.addSuppressed(inner);
+ }
+ }
+ throw e;
+ }
+ }
+
+ /**
+ * This method safely closes all the resources.
+ *
+ * This method will iterate through all the resources and closes them. If any exception happened during the
+ * traversal, exception will be captured and rethrown after all resources closed.
+ *
+ */
+ public static void closeAll(Iterator resources) {
+ Throwable t = null;
+ while (resources.hasNext()) {
+ try {
+ resources.next().close();
+ } catch (Exception e) {
+ if (t == null) {
+ t = e;
+ } else {
+ t.addSuppressed(e);
+ }
+ }
+ }
+
+ if (t != null) throw new RuntimeException(t);
+ }
+
+
+ /**
+ * This method safely closes all the resources. See {@link #closeAll(Iterator)} for more details.
+ */
+ public static void closeAll(R... resources) {
+ closeAll(Arrays.asList(resources));
+ }
+
+ /**
+ * This method safely closes the resources. See {@link #closeAll(Iterator)} for more details.
+ */
+ public static void closeAll(Collection resources) {
+ closeAll(resources.iterator());
+ }
+}
diff --git a/src/main/java/com/nvidia/spark/rapids/jni/Pair.java b/src/main/java/com/nvidia/spark/rapids/jni/Pair.java
new file mode 100644
index 0000000000..8a0b4b0fee
--- /dev/null
+++ b/src/main/java/com/nvidia/spark/rapids/jni/Pair.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.jni;
+
+/**
+ * A utility class for holding a pair of values.
+ */
+class Pair {
+ private final K left;
+ private final V right;
+
+ public Pair(K left, V right) {
+ this.left = left;
+ this.right = right;
+ }
+
+ public K getLeft() {
+ return left;
+ }
+
+ public V getRight() {
+ return right;
+ }
+
+ public static Pair of(K left, V right) {
+ return new Pair<>(left, right);
+ }
+}
diff --git a/src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java b/src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java
new file mode 100644
index 0000000000..67473a2e61
--- /dev/null
+++ b/src/main/java/com/nvidia/spark/rapids/jni/Preconditions.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.jni;
+
+import java.util.function.Supplier;
+
+/**
+ * This class contains utility methods for checking preconditions.
+ */
+class Preconditions {
+ /**
+ * Check if the condition is true, otherwise throw an IllegalStateException with the given message.
+ */
+ public static void ensure(boolean condition, String message) {
+ if (!condition) {
+ throw new IllegalStateException(message);
+ }
+ }
+
+ /**
+ * Check if the condition is true, otherwise throw an IllegalStateException with the given message supplier.
+ */
+ public static void ensure(boolean condition, Supplier messageSupplier) {
+ if (!condition) {
+ throw new IllegalStateException(messageSupplier.get());
+ }
+ }
+}
From d0295590dc98e1246da45963ce6f69b46f44ebda Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 30 Oct 2024 10:53:19 +0800
Subject: [PATCH 094/157] Update submodule cudf to
eeb4d2780163794f4b705062e49dbdc3283ebce0 (#2547)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index 8d7b0d8bf0..eeb4d27801 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 8d7b0d8bf0aebebde0a5036d2e51f5991ecbe63b
+Subproject commit eeb4d2780163794f4b705062e49dbdc3283ebce0
From b0ed7345c111f78c9e734c7108170a7cba8be4f2 Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Wed, 30 Oct 2024 22:17:11 +0800
Subject: [PATCH 095/157] Update submodule cudf to
6328ad679947eb5cbc352c345a28f079aa6b8005 (#2549)
Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
thirdparty/cudf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/thirdparty/cudf b/thirdparty/cudf
index eeb4d27801..6328ad6799 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit eeb4d2780163794f4b705062e49dbdc3283ebce0
+Subproject commit 6328ad679947eb5cbc352c345a28f079aa6b8005
From c8ff5d638c85cd5af23f60abb968dceb0a381818 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans"
Date: Wed, 30 Oct 2024 14:06:44 -0500
Subject: [PATCH 096/157] Upmerge to a new version of CUDF with a new version
of nvcomp (#2550)
Signed-off-by: Robert (Bobby) Evans
---
ci/submodule-sync.sh | 8 -
patches/noop.patch | 0
patches/revert_nvcomp4.patch | 939 -----------------------------
thirdparty/cudf | 2 +-
thirdparty/cudf-pins/versions.json | 12 +-
5 files changed, 7 insertions(+), 954 deletions(-)
create mode 100644 patches/noop.patch
delete mode 100644 patches/revert_nvcomp4.patch
diff --git a/ci/submodule-sync.sh b/ci/submodule-sync.sh
index ea7c06b7ec..7f3e468862 100755
--- a/ci/submodule-sync.sh
+++ b/ci/submodule-sync.sh
@@ -91,14 +91,6 @@ rapids_cmake_sha=$(git -C ${LIBCUDF_BUILD_PATH}/_deps/rapids-cmake-src/ rev-pars
echo "Update rapids-cmake pinned SHA1 to ${rapids_cmake_sha}"
echo "${rapids_cmake_sha}" > thirdparty/cudf-pins/rapids-cmake.sha
-# Bash the wrong nvcomp version to the correct version until
-# nvcomp version mismatch is fixed. https://github.com/rapidsai/cudf/issues/16772.
-echo "Revert nvcomp to 3.0.6"
-sed -i -e 's/4\.0\.1\.0/3.0.6/' \
- -e 's|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-sbsa-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_SBSA_${cuda-toolkit-version-mapping}.tgz|' \
- -e 's|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp-linux-x86_64-${version}-cuda${cuda-toolkit-version-mapping}.tar.gz|https://developer.download.nvidia.com/compute/nvcomp/${version}/local_installers/nvcomp_${version}_x86_64_${cuda-toolkit-version-mapping}.tgz|' \
- thirdparty/cudf-pins/versions.json
-
# Do the git add after the build so that we get
# the updated versions.json generated by the build
echo "Update cudf submodule to ${cudf_sha} with updated pinned versions"
diff --git a/patches/noop.patch b/patches/noop.patch
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/patches/revert_nvcomp4.patch b/patches/revert_nvcomp4.patch
deleted file mode 100644
index 914c033088..0000000000
--- a/patches/revert_nvcomp4.patch
+++ /dev/null
@@ -1,939 +0,0 @@
-diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh
-index fb93b06dbe..e5565c4b53 100755
---- a/ci/build_wheel_cudf.sh
-+++ b/ci/build_wheel_cudf.sh
-@@ -23,6 +23,8 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
- python -m auditwheel repair \
- --exclude libcudf.so \
- --exclude libnvcomp.so \
-+ --exclude libnvcomp_bitcomp.so \
-+ --exclude libnvcomp_gdeflate.so \
- -w ${package_dir}/final_dist \
- ${package_dir}/dist/*
-
-diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh
-index 5e9f7f8a0c..0e4745bda2 100755
---- a/ci/build_wheel_pylibcudf.sh
-+++ b/ci/build_wheel_pylibcudf.sh
-@@ -21,6 +21,8 @@ export PIP_CONSTRAINT="/tmp/constraints.txt"
- python -m auditwheel repair \
- --exclude libcudf.so \
- --exclude libnvcomp.so \
-+ --exclude libnvcomp_bitcomp.so \
-+ --exclude libnvcomp_gdeflate.so \
- -w ${package_dir}/final_dist \
- ${package_dir}/dist/*
-
-diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
-index bd5e6c3d56..74ca3fda1a 100644
---- a/conda/environments/all_cuda-118_arch-x86_64.yaml
-+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
-@@ -58,7 +58,7 @@ dependencies:
- - numpy>=1.23,<3.0a0
- - numpydoc
- - nvcc_linux-64=11.8
--- nvcomp==4.0.1
-+- nvcomp==3.0.6
- - nvtx>=0.2.1
- - openpyxl
- - packaging
-diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
-index 565a3ebfa3..22619acf4a 100644
---- a/conda/environments/all_cuda-125_arch-x86_64.yaml
-+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
-@@ -56,7 +56,7 @@ dependencies:
- - numba-cuda>=0.0.13
- - numpy>=1.23,<3.0a0
- - numpydoc
--- nvcomp==4.0.1
-+- nvcomp==3.0.6
- - nvtx>=0.2.1
- - openpyxl
- - packaging
-@@ -67,7 +67,7 @@ dependencies:
- - pre-commit
- - pyarrow>=14.0.0,<18.0.0a0
- - pydata-sphinx-theme!=0.14.2
--- pynvjitlink>=0.0.0a0
-+- pynvjitlink
- - pytest-benchmark
- - pytest-cases>=3.8.2
- - pytest-cov
-diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
-index dc75eb4b25..67d501d746 100644
---- a/conda/recipes/libcudf/conda_build_config.yaml
-+++ b/conda/recipes/libcudf/conda_build_config.yaml
-@@ -35,7 +35,7 @@ spdlog_version:
- - ">=1.14.1,<1.15"
-
- nvcomp_version:
-- - "=4.0.1"
-+ - "=3.0.6"
-
- zlib_version:
- - ">=1.2.13"
-diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp
-index 0d74a4158a..f3260d0cb5 100644
---- a/cpp/include/cudf/io/nvcomp_adapter.hpp
-+++ b/cpp/include/cudf/io/nvcomp_adapter.hpp
-@@ -24,7 +24,7 @@
- namespace CUDF_EXPORT cudf {
- namespace io::nvcomp {
-
--enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4, GZIP };
-+enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 };
-
- /**
- * @brief Set of parameters that impact whether nvCOMP features are enabled.
-@@ -36,20 +36,33 @@ struct feature_status_parameters {
- int lib_patch_version; ///< patch version
- bool are_all_integrations_enabled; ///< all integrations
- bool are_stable_integrations_enabled; ///< stable integrations
-+ int compute_capability_major; ///< cuda compute major version
-
- /**
-- * @brief Default constructor using the current version of nvcomp and current environment
-- * variables
-+ * @brief Default Constructor
- */
- feature_status_parameters();
-
- /**
-- * @brief Constructor using the current version of nvcomp
-+ * @brief feature_status_parameters Constructor
- *
-+ * @param major positive integer representing major value of nvcomp
-+ * @param minor positive integer representing minor value of nvcomp
-+ * @param patch positive integer representing patch value of nvcomp
- * @param all_enabled if all integrations are enabled
- * @param stable_enabled if stable integrations are enabled
-+ * @param cc_major CUDA compute capability
- */
-- feature_status_parameters(bool all_enabled, bool stable_enabled);
-+ feature_status_parameters(
-+ int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major)
-+ : lib_major_version{major},
-+ lib_minor_version{minor},
-+ lib_patch_version{patch},
-+ are_all_integrations_enabled{all_enabled},
-+ are_stable_integrations_enabled{stable_enabled},
-+ compute_capability_major{cc_major}
-+ {
-+ }
- };
-
- /**
-@@ -61,7 +74,8 @@ inline bool operator==(feature_status_parameters const& lhs, feature_status_para
- lhs.lib_minor_version == rhs.lib_minor_version and
- lhs.lib_patch_version == rhs.lib_patch_version and
- lhs.are_all_integrations_enabled == rhs.are_all_integrations_enabled and
-- lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled;
-+ lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled and
-+ lhs.compute_capability_major == rhs.compute_capability_major;
- }
-
- /**
-diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
-index c3187f73a9..3191e8f015 100644
---- a/cpp/src/io/comp/nvcomp_adapter.cpp
-+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
-@@ -22,46 +22,94 @@
- #include
- #include
-
--#include
--#include
- #include
- #include
--#include
-
- #include
-
-+#define NVCOMP_DEFLATE_HEADER
-+#if __has_include(NVCOMP_DEFLATE_HEADER)
-+#include NVCOMP_DEFLATE_HEADER
-+#endif
-+
-+#define NVCOMP_ZSTD_HEADER
-+#if __has_include(NVCOMP_ZSTD_HEADER)
-+#include NVCOMP_ZSTD_HEADER
-+#endif
-+
-+// When building with nvcomp 4.0 or newer, map the new version macros to the old ones
-+#ifndef NVCOMP_MAJOR_VERSION
-+#define NVCOMP_MAJOR_VERSION NVCOMP_VER_MAJOR
-+#define NVCOMP_MINOR_VERSION NVCOMP_VER_MINOR
-+#define NVCOMP_PATCH_VERSION NVCOMP_VER_PATCH
-+#endif
-+
-+#define NVCOMP_HAS_ZSTD_DECOMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 3))
-+
-+#define NVCOMP_HAS_ZSTD_COMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 4))
-+
-+#define NVCOMP_HAS_DEFLATE(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 5))
-+
-+#define NVCOMP_HAS_DECOMP_TEMPSIZE_EX(MAJOR, MINOR, PATCH) \
-+ (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 1))
-+
-+#define NVCOMP_HAS_COMP_TEMPSIZE_EX(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 6))
-+
-+// ZSTD is stable for nvcomp 2.3.2 or newer
-+#define NVCOMP_ZSTD_DECOMP_IS_STABLE(MAJOR, MINOR, PATCH) \
-+ (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 2))
-+
- namespace cudf::io::nvcomp {
-
- // Dispatcher for nvcompBatchedDecompressGetTempSizeEx
- template
--auto batched_decompress_get_temp_size_ex(compression_type compression, Args&&... args)
-+std::optional batched_decompress_get_temp_size_ex(compression_type compression,
-+ Args&&... args)
- {
-+#if NVCOMP_HAS_DECOMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
- switch (compression) {
- case compression_type::SNAPPY:
- return nvcompBatchedSnappyDecompressGetTempSizeEx(std::forward(args)...);
- case compression_type::ZSTD:
-+#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
- return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward