diff --git a/docs/get-started/xgboost-examples/csp/databricks/databricks.md b/docs/get-started/xgboost-examples/csp/databricks/databricks.md index 35ae7fe8b..d9cd9f013 100644 --- a/docs/get-started/xgboost-examples/csp/databricks/databricks.md +++ b/docs/get-started/xgboost-examples/csp/databricks/databricks.md @@ -43,7 +43,7 @@ cluster. - [Databricks 9.1 LTS ML](https://docs.databricks.com/release-notes/runtime/9.1ml.html#system-environment) has CUDA 11 - installed. Users will need to use 22.02.0 or later on Databricks 9.1 LTS ML. In this case use + installed. Users will need to use 21.12.0 or later on Databricks 9.1 LTS ML. In this case use [generate-init-script.ipynb](generate-init-script.ipynb) which will install the RAPIDS Spark plugin. diff --git a/docs/get-started/xgboost-examples/csp/databricks/generate-init-script-10.4.ipynb b/docs/get-started/xgboost-examples/csp/databricks/generate-init-script-10.4.ipynb index 5ec91ef90..2ef2dee46 100644 --- a/docs/get-started/xgboost-examples/csp/databricks/generate-init-script-10.4.ipynb +++ b/docs/get-started/xgboost-examples/csp/databricks/generate-init-script-10.4.ipynb @@ -26,8 +26,8 @@ "cd ../../dbfs/FileStore/jars/\n", "sudo wget -O cudf-22.04.0-cuda11.jar https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/cudf-22.04.0-cuda11.jar\n", "sudo wget -O rapids-4-spark_2.12-22.04.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar\n", - "sudo wget -O xgboost4j_3.0-1.4.2-0.2.0.jar https://repo1.maven.org/maven2/com/nvidia/xgboost4j_3.0/1.4.2-0.2.0/xgboost4j_3.0-1.4.2-0.2.0.jar\n", - "sudo wget -O xgboost4j-spark_3.0-1.4.2-0.2.0.jar https://repo1.maven.org/maven2/com/nvidia/xgboost4j-spark_3.0/1.4.2-0.2.0/xgboost4j-spark_3.0-1.4.2-0.2.0.jar\n", + "sudo wget -O xgboost4j_3.0-1.4.2-0.3.0.jar https://repo1.maven.org/maven2/com/nvidia/xgboost4j_3.0/1.4.2-0.3.0/xgboost4j_3.0-1.4.2-0.3.0.jar\n", + "sudo wget -O xgboost4j-spark_3.0-1.4.2-0.3.0.jar https://repo1.maven.org/maven2/com/nvidia/xgboost4j-spark_3.0/1.4.2-0.3.0/xgboost4j-spark_3.0-1.4.2-0.3.0.jar\n", "ls -ltr\n", "\n", "# Your Jars are downloaded in dbfs:/FileStore/jars directory" @@ -57,10 +57,10 @@ "source": [ "dbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n", "#!/bin/bash\n", - "sudo cp /dbfs/FileStore/jars/xgboost4j_3.0-1.4.2-0.2.0.jar /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-gpu_2.12--ml.dmlc__xgboost4j-gpu_2.12__1.5.2.jar\n", + "sudo cp /dbfs/FileStore/jars/xgboost4j_3.0-1.4.2-0.3.0.jar /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-gpu_2.12--ml.dmlc__xgboost4j-gpu_2.12__1.5.2.jar\n", "sudo cp /dbfs/FileStore/jars/cudf-22.04.0-cuda11.jar /databricks/jars/\n", "sudo cp /dbfs/FileStore/jars/rapids-4-spark_2.12-22.04.0.jar /databricks/jars/\n", - "sudo cp /dbfs/FileStore/jars/xgboost4j-spark_3.0-1.4.2-0.2.0.jar /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-spark-gpu_2.12--ml.dmlc__xgboost4j-spark-gpu_2.12__1.5.2.jar\"\"\", True)" + "sudo cp /dbfs/FileStore/jars/xgboost4j-spark_3.0-1.4.2-0.3.0.jar /databricks/jars/spark--maven-trees--ml--10.x--xgboost-gpu--ml.dmlc--xgboost4j-spark-gpu_2.12--ml.dmlc__xgboost4j-spark-gpu_2.12__1.5.2.jar\"\"\", True)" ] }, { @@ -131,7 +131,7 @@ "\n", "1. Edit your cluster, adding an initialization script from `dbfs:/databricks/init_scripts/init.sh` in the \"Advanced Options\" under \"Init Scripts\" tab\n", "2. Reboot the cluster\n", - "3. Go to \"Libraries\" tab under your cluster and install `dbfs:/FileStore/jars/xgboost4j-spark_3.0-1.4.2-0.2.0.jar` in your cluster by selecting the \"DBFS\" option for installing jars\n", + "3. Go to \"Libraries\" tab under your cluster and install `dbfs:/FileStore/jars/xgboost4j-spark_3.0-1.4.2-0.3.0.jar` in your cluster by selecting the \"DBFS\" option for installing jars\n", "4. Import the mortgage example notebook from `https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.04/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/mortgage-gpu.ipynb`\n", "5. Inside the mortgage example notebook, update the data paths\n", " `train_data = reader.schema(schema).option('header', True).csv('/data/mortgage/csv/small-train.csv')`\n", diff --git a/docs/get-started/xgboost-examples/csp/databricks/generate-init-script.ipynb b/docs/get-started/xgboost-examples/csp/databricks/generate-init-script.ipynb index ab613b79d..9645b1296 100644 --- a/docs/get-started/xgboost-examples/csp/databricks/generate-init-script.ipynb +++ b/docs/get-started/xgboost-examples/csp/databricks/generate-init-script.ipynb @@ -24,10 +24,10 @@ "source": [ "%sh\n", "cd ../../dbfs/FileStore/jars/\n", - "sudo wget -O cudf-22.02.0-cuda11.jar https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/cudf-22.02.0-cuda11.jar\n", - "sudo wget -O rapids-4-spark_2.12-22.02.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar\n", - "sudo wget -O xgboost4j_3.0-1.4.2-0.2.0.jar https://repo1.maven.org/maven2/com/nvidia/xgboost4j_3.0/1.4.2-0.2.0/xgboost4j_3.0-1.4.2-0.2.0.jar\n", - "sudo wget -O xgboost4j-spark_3.0-1.4.2-0.2.0.jar https://repo1.maven.org/maven2/com/nvidia/xgboost4j-spark_3.0/1.4.2-0.2.0/xgboost4j-spark_3.0-1.4.2-0.2.0.jar\n", + "sudo wget -O cudf-22.04.0-cuda11.jar https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/cudf-22.04.0-cuda11.jar\n", + "sudo wget -O rapids-4-spark_2.12-22.04.0.jar https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar\n", + "sudo wget -O xgboost4j_3.0-1.4.2-0.3.0.jar https://repo1.maven.org/maven2/com/nvidia/xgboost4j_3.0/1.4.2-0.3.0/xgboost4j_3.0-1.4.2-0.3.0.jar\n", + "sudo wget -O xgboost4j-spark_3.0-1.4.2-0.3.0.jar https://repo1.maven.org/maven2/com/nvidia/xgboost4j-spark_3.0/1.4.2-0.3.0/xgboost4j-spark_3.0-1.4.2-0.3.0.jar\n", "ls -ltr\n", "\n", "# Your Jars are downloaded in dbfs:/FileStore/jars directory" @@ -57,10 +57,10 @@ "source": [ "dbutils.fs.put(\"/databricks/init_scripts/init.sh\",\"\"\"\n", "#!/bin/bash\n", - "sudo cp /dbfs/FileStore/jars/xgboost4j_3.0-1.4.2-0.2.0.jar /databricks/jars/spark--maven-trees--ml--9.x--xgboost-gpu--ml.dmlc--xgboost4j-gpu_2.12--ml.dmlc__xgboost4j-gpu_2.12__1.4.1.jar\n", - "sudo cp /dbfs/FileStore/jars/cudf-22.02.0-cuda11.jar /databricks/jars/\n", - "sudo cp /dbfs/FileStore/jars/rapids-4-spark_2.12-22.02.0.jar /databricks/jars/\n", - "sudo cp /dbfs/FileStore/jars/xgboost4j-spark_3.0-1.4.2-0.2.0.jar /databricks/jars/spark--maven-trees--ml--9.x--xgboost-gpu--ml.dmlc--xgboost4j-spark-gpu_2.12--ml.dmlc__xgboost4j-spark-gpu_2.12__1.4.1.jar\"\"\", True)" + "sudo cp /dbfs/FileStore/jars/xgboost4j_3.0-1.4.2-0.3.0.jar /databricks/jars/spark--maven-trees--ml--9.x--xgboost-gpu--ml.dmlc--xgboost4j-gpu_2.12--ml.dmlc__xgboost4j-gpu_2.12__1.4.1.jar\n", + "sudo cp /dbfs/FileStore/jars/cudf-22.04.0-cuda11.jar /databricks/jars/\n", + "sudo cp /dbfs/FileStore/jars/rapids-4-spark_2.12-22.04.0.jar /databricks/jars/\n", + "sudo cp /dbfs/FileStore/jars/xgboost4j-spark_3.0-1.4.2-0.3.0.jar /databricks/jars/spark--maven-trees--ml--9.x--xgboost-gpu--ml.dmlc--xgboost4j-spark-gpu_2.12--ml.dmlc__xgboost4j-spark-gpu_2.12__1.4.1.jar\"\"\", True)" ] }, { @@ -131,8 +131,8 @@ "\n", "1. Edit your cluster, adding an initialization script from `dbfs:/databricks/init_scripts/init.sh` in the \"Advanced Options\" under \"Init Scripts\" tab\n", "2. Reboot the cluster\n", - "3. Go to \"Libraries\" tab under your cluster and install `dbfs:/FileStore/jars/xgboost4j-spark_3.0-1.4.2-0.2.0.jar` in your cluster by selecting the \"DBFS\" option for installing jars\n", - "4. Import the mortgage example notebook from `https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.02/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/mortgage-gpu.ipynb`\n", + "3. Go to \"Libraries\" tab under your cluster and install `dbfs:/FileStore/jars/xgboost4j-spark_3.0-1.4.2-0.3.0.jar` in your cluster by selecting the \"DBFS\" option for installing jars\n", + "4. Import the mortgage example notebook from `https://github.com/NVIDIA/spark-rapids-examples/blob/branch-22.04/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/mortgage-gpu.ipynb`\n", "5. Inside the mortgage example notebook, update the data paths\n", " `train_data = reader.schema(schema).option('header', True).csv('/data/mortgage/csv/small-train.csv')`\n", " `trans_data = reader.schema(schema).option('header', True).csv('/data/mortgage/csv/small-trans.csv')`" diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md index 8b850d028..efb960224 100644 --- a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md +++ b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes-scala.md @@ -40,7 +40,7 @@ export SPARK_DOCKER_IMAGE= export SPARK_DOCKER_TAG= pushd ${SPARK_HOME} -wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-22.02/dockerfile/Dockerfile +wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-22.04/dockerfile/Dockerfile # Optionally install additional jars into ${SPARK_HOME}/jars/ diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md index 8b4c0d279..199e33d04 100644 --- a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md +++ b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-python.md @@ -95,8 +95,6 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.task.resource.gpu.amount=1 \ --conf spark.plugins=com.nvidia.spark.SQLPlugin \ --conf spark.rapids.sql.incompatibleDateFormats.enabled=true \ - --conf spark.rapids.sql.csv.read.integer.enabled=true \ - --conf spark.rapids.sql.csv.read.long.enabled=true \ --conf spark.rapids.sql.csv.read.double.enabled=true \ --py-files ${SAMPLE_ZIP} \ main.py \ diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md index 8a9d7ff85..8cf15aaba 100644 --- a/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md +++ b/docs/get-started/xgboost-examples/on-prem-cluster/standalone-scala.md @@ -104,8 +104,6 @@ ${SPARK_HOME}/bin/spark-submit \ --conf spark.task.resource.gpu.amount=1 \ --conf spark.plugins=com.nvidia.spark.SQLPlugin \ --conf spark.rapids.sql.incompatibleDateFormats.enabled=true \ - --conf spark.rapids.sql.csv.read.integer.enabled=true \ - --conf spark.rapids.sql.csv.read.long.enabled=true \ --conf spark.rapids.sql.csv.read.double.enabled=true \ --class com.nvidia.spark.examples.mortgage.ETLMain \ $SAMPLE_JAR \ diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md index 94cf6f9e1..1b0edd3d7 100644 --- a/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md +++ b/docs/get-started/xgboost-examples/prepare-package-data/preparation-python.md @@ -5,15 +5,15 @@ For simplicity export the location to these jars. All examples assume the packag ### Download the jars 1. Download the XGBoost for Apache Spark jars - * [XGBoost4j Package](https://repo1.maven.org/maven2/com/nvidia/xgboost4j_3.0/1.4.2-0.2.0/) - * [XGBoost4j-Spark Package](https://repo1.maven.org/maven2/com/nvidia/xgboost4j-spark_3.0/1.4.2-0.2.0/) + * [XGBoost4j Package](https://repo1.maven.org/maven2/com/nvidia/xgboost4j_3.0/1.4.2-0.3.0/) + * [XGBoost4j-Spark Package](https://repo1.maven.org/maven2/com/nvidia/xgboost4j-spark_3.0/1.4.2-0.3.0/) 2. Download the RAPIDS Accelerator for Apache Spark plugin jar - * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar) + * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar) Then download the version of the cudf jar that your version of the accelerator depends on. - * [cuDF Package](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/cudf-22.02.0-cuda11.jar) + * [cuDF Package](https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/cudf-22.04.0-cuda11.jar) ### Build XGBoost Python Examples @@ -29,10 +29,10 @@ You need to download Mortgage dataset to `/opt/xgboost` from this [site](https:/ ``` bash export SPARK_XGBOOST_DIR=/opt/xgboost -export CUDF_JAR=${SPARK_XGBOOST_DIR}/cudf-22.02.0-cuda11.jar -export RAPIDS_JAR=${SPARK_XGBOOST_DIR}/rapids-4-spark_2.12-22.02.0.jar -export XGBOOST4J_JAR=${SPARK_XGBOOST_DIR}/xgboost4j_3.0-1.4.2-0.2.0.jar -export XGBOOST4J_SPARK_JAR=${SPARK_XGBOOST_DIR}/xgboost4j-spark_3.0-1.4.2-0.2.0.jar +export CUDF_JAR=${SPARK_XGBOOST_DIR}/cudf-22.04.0-cuda11.jar +export RAPIDS_JAR=${SPARK_XGBOOST_DIR}/rapids-4-spark_2.12-22.04.0.jar +export XGBOOST4J_JAR=${SPARK_XGBOOST_DIR}/xgboost4j_3.0-1.4.2-0.3.0.jar +export XGBOOST4J_SPARK_JAR=${SPARK_XGBOOST_DIR}/xgboost4j-spark_3.0-1.4.2-0.3.0.jar export SAMPLE_ZIP=${SPARK_XGBOOST_DIR}/samples.zip export MAIN_PY=${SPARK_XGBOOST_DIR}/main.py ``` diff --git a/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md b/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md index 7de6fb185..c77c3aead 100644 --- a/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md +++ b/docs/get-started/xgboost-examples/prepare-package-data/preparation-scala.md @@ -5,11 +5,11 @@ For simplicity export the location to these jars. All examples assume the packag ### Download the jars 1. Download the RAPIDS Accelerator for Apache Spark plugin jar - * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar) + * [RAPIDS Spark Package](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar) Then download the version of the cudf jar that your version of the accelerator depends on. - * [cuDF Package](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/cudf-22.02.0-cuda11.jar) + * [cuDF Package](https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/cudf-22.04.0-cuda11.jar) ### Build XGBoost Scala Examples @@ -25,7 +25,7 @@ You need to download mortgage dataset to `/opt/xgboost` from this [site](https:/ ``` bash export SPARK_XGBOOST_DIR=/opt/xgboost -export CUDF_JAR=${SPARK_XGBOOST_DIR}/cudf-22.02.0-cuda11.jar -export RAPIDS_JAR=${SPARK_XGBOOST_DIR}/rapids-4-spark_2.12-22.02.0.jar +export CUDF_JAR=${SPARK_XGBOOST_DIR}/cudf-22.04.0-cuda11.jar +export RAPIDS_JAR=${SPARK_XGBOOST_DIR}/rapids-4-spark_2.12-22.04.0.jar export SAMPLE_JAR=${SPARK_XGBOOST_DIR}/sample_xgboost_apps-0.2.2-jar-with-dependencies.jar ``` diff --git a/examples/RAPIDS-accelerated-UDFs/README.md b/examples/RAPIDS-accelerated-UDFs/README.md index 1ba332cb5..37c6fb5ed 100644 --- a/examples/RAPIDS-accelerated-UDFs/README.md +++ b/examples/RAPIDS-accelerated-UDFs/README.md @@ -108,8 +108,8 @@ See above Prerequisites section First finish the steps in "Building with Native Code Examples and run test cases" section, then do the following in the docker. ### Get jars from Maven Central -[cudf-22.02.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/cudf-22.02.0-cuda11.jar) -[rapids-4-spark_2.12-22.02.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar) +[cudf-22.04.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/cudf-22.04.0-cuda11.jar) +[rapids-4-spark_2.12-22.04.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar) ### Launch a local mode Spark diff --git a/examples/RAPIDS-accelerated-UDFs/pom.xml b/examples/RAPIDS-accelerated-UDFs/pom.xml index 0691ff78b..9a7c9a2a7 100644 --- a/examples/RAPIDS-accelerated-UDFs/pom.xml +++ b/examples/RAPIDS-accelerated-UDFs/pom.xml @@ -37,9 +37,9 @@ cuda11 2.12 - 22.02.0 + 22.04.0 - 22.02.0 + 22.04.0 3.1.1 2.12.15 ${project.build.directory}/cpp-build diff --git a/examples/Spark-ETL+XGBoost/aggregator/dependency-reduced-pom.xml b/examples/Spark-ETL+XGBoost/aggregator/dependency-reduced-pom.xml index 6a41d7094..5e5d20fe7 100644 --- a/examples/Spark-ETL+XGBoost/aggregator/dependency-reduced-pom.xml +++ b/examples/Spark-ETL+XGBoost/aggregator/dependency-reduced-pom.xml @@ -34,13 +34,13 @@ com.nvidia xgboost4j_3.0 - 1.4.2-0.2.0 + 1.4.2-0.3.0 compile com.nvidia xgboost4j-spark_3.0 - 1.4.2-0.2.0 + 1.4.2-0.3.0 compile diff --git a/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb b/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb index 48d19ee25..2e9466fcb 100644 --- a/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb +++ b/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/MortgageETL+XGBoost.ipynb @@ -624,9 +624,7 @@ "spark.conf.set(\"spark.rapids.sql.incompatibleDateFormats.enabled\", \"true\")\n", "spark.conf.set(\"spark.rapids.sql.hasNans\", \"false\")\n", "# use GPU to read CSV\n", - "spark.conf.set(\"spark.rapids.sql.csv.read.long.enabled\", \"true\")\n", - "spark.conf.set(\"spark.rapids.sql.csv.read.double.enabled\", \"true\")\n", - "spark.conf.set(\"spark.rapids.sql.csv.read.integer.enabled\", \"true\")" + "spark.conf.set(\"spark.rapids.sql.csv.read.double.enabled\", \"true\")" ] }, { diff --git a/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/MortgageETL.ipynb b/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/MortgageETL.ipynb index 7d192f5f1..5986ded5c 100644 --- a/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/MortgageETL.ipynb +++ b/examples/Spark-ETL+XGBoost/mortgage/notebooks/python/MortgageETL.ipynb @@ -9,8 +9,8 @@ "All data could be found at https://docs.rapids.ai/datasets/mortgage-data\n", "\n", "### 2. Download needed jars\n", - "* [cudf-22.02.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/)\n", - "* [rapids-4-spark_2.12-22.02.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar)\n", + "* [cudf-22.04.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/)\n", + "* [rapids-4-spark_2.12-22.04.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar)\n", "\n", "\n", "### 3. Start Spark Standalone\n", @@ -18,7 +18,7 @@ "\n", "### 4. Add ENV\n", "```\n", - "$ export SPARK_JARS=cudf-22.02.0-cuda11.jar,rapids-4-spark_2.12-22.02.0.jar\n", + "$ export SPARK_JARS=cudf-22.04.0-cuda11.jar,rapids-4-spark_2.12-22.04.0.jar\n", "$ export PYSPARK_DRIVER_PYTHON=jupyter \n", "$ export PYSPARK_DRIVER_PYTHON_OPTS=notebook\n", "```\n", @@ -30,8 +30,6 @@ "--jars ${SPARK_JARS} \\\n", "--conf spark.plugins=com.nvidia.spark.SQLPlugin \\\n", "--conf spark.rapids.sql.incompatibleDateFormats.enabled=true \\\n", - "--conf spark.rapids.sql.csv.read.integer.enabled=true \\\n", - "--conf spark.rapids.sql.csv.read.long.enabled=true \\\n", "--conf spark.rapids.sql.csv.read.double.enabled=true \\\n", "--py-files ${SPARK_PY_FILES}\n", "```\n", diff --git a/examples/Spark-ETL+XGBoost/mortgage/notebooks/scala/mortgage-ETL.ipynb b/examples/Spark-ETL+XGBoost/mortgage/notebooks/scala/mortgage-ETL.ipynb index fea67cc5c..b768709cf 100644 --- a/examples/Spark-ETL+XGBoost/mortgage/notebooks/scala/mortgage-ETL.ipynb +++ b/examples/Spark-ETL+XGBoost/mortgage/notebooks/scala/mortgage-ETL.ipynb @@ -19,15 +19,15 @@ "All data could be found at https://docs.rapids.ai/datasets/mortgage-data\n", "\n", "### 2. Download needed jars\n", - "* [cudf-22.02.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/)\n", - "* [rapids-4-spark_2.12-22.02.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar)\n", + "* [cudf-22.04.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/)\n", + "* [rapids-4-spark_2.12-22.04.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar)\n", "\n", "### 3. Start Spark Standalone\n", "Before Running the script, please setup Spark standalone mode\n", "\n", "### 4. Add ENV\n", "```\n", - "$ export SPARK_JARS=cudf-22.02.0-cuda11.jar,rapids-4-spark_2.12-22.02.0.jar\n", + "$ export SPARK_JARS=cudf-22.04.0-cuda11.jar,rapids-4-spark_2.12-22.04.0.jar\n", "\n", "```\n", "\n", diff --git a/examples/Spark-ETL+XGBoost/mortgage/notebooks/scala/mortgage_gpu_crossvalidation.ipynb b/examples/Spark-ETL+XGBoost/mortgage/notebooks/scala/mortgage_gpu_crossvalidation.ipynb index 174a82760..97c0e4f5a 100644 --- a/examples/Spark-ETL+XGBoost/mortgage/notebooks/scala/mortgage_gpu_crossvalidation.ipynb +++ b/examples/Spark-ETL+XGBoost/mortgage/notebooks/scala/mortgage_gpu_crossvalidation.ipynb @@ -160,10 +160,10 @@ "```scala\n", "import org.apache.spark.sql.SparkSession\n", "val spark = SparkSession.builder().appName(\"Taxi-GPU\").getOrCreate\n", - "%AddJar file:/data/libs/cudf-22.02.0-cuda11.jar\n", - "%AddJar file:/data/libs/xgboost4j_3.0-1.4.2-0.2.0.jar\n", - "%AddJar file:/data/libs/xgboost4j-spark_3.0-1.4.2-0.2.0.jar\n", - "%AddJar file:/data/libs/rapids-4-spark_2.12-22.02.0.jar\n", + "%AddJar file:/data/libs/cudf-22.04.0-cuda11.jar\n", + "%AddJar file:/data/libs/xgboost4j_3.0-1.4.2-0.3.0.jar\n", + "%AddJar file:/data/libs/xgboost4j-spark_3.0-1.4.2-0.3.0.jar\n", + "%AddJar file:/data/libs/rapids-4-spark_2.12-22.04.0.jar\n", "// ...\n", "```" ] diff --git a/examples/Spark-ETL+XGBoost/pom.xml b/examples/Spark-ETL+XGBoost/pom.xml index 48cab9f90..9a8dc084f 100644 --- a/examples/Spark-ETL+XGBoost/pom.xml +++ b/examples/Spark-ETL+XGBoost/pom.xml @@ -38,7 +38,7 @@ UTF-8 - 1.4.2-0.2.0 + 1.4.2-0.3.0 3.1.1 2.12.8 2.12 diff --git a/examples/Spark-ETL+XGBoost/taxi/notebooks/python/taxi-ETL.ipynb b/examples/Spark-ETL+XGBoost/taxi/notebooks/python/taxi-ETL.ipynb index 14c9fda2b..806955ead 100644 --- a/examples/Spark-ETL+XGBoost/taxi/notebooks/python/taxi-ETL.ipynb +++ b/examples/Spark-ETL+XGBoost/taxi/notebooks/python/taxi-ETL.ipynb @@ -19,15 +19,15 @@ "All data could be found at https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page\n", "\n", "### 2. Download needed jars\n", - "* [cudf-22.02.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/)\n", - "* [rapids-4-spark_2.12-22.02.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar)\n", + "* [cudf-22.04.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/)\n", + "* [rapids-4-spark_2.12-22.04.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar)\n", "\n", "### 3. Start Spark Standalone\n", "Before running the script, please setup Spark standalone mode\n", "\n", "### 4. Add ENV\n", "```\n", - "$ export SPARK_JARS=cudf-22.02.0-cuda11.jar,rapids-4-spark_2.12-22.02.0.jar\n", + "$ export SPARK_JARS=cudf-22.04.0-cuda11.jar,rapids-4-spark_2.12-22.04.0.jar\n", "$ export PYSPARK_DRIVER_PYTHON=jupyter \n", "$ export PYSPARK_DRIVER_PYTHON_OPTS=notebook\n", "```\n", @@ -39,8 +39,6 @@ "--jars ${SPARK_JARS} \\\n", "--conf spark.plugins=com.nvidia.spark.SQLPlugin \\\n", "--conf spark.rapids.sql.incompatibleDateFormats.enabled=true \\\n", - "--conf spark.rapids.sql.csv.read.integer.enabled=true \\\n", - "--conf spark.rapids.sql.csv.read.long.enabled=true \\\n", "--conf spark.rapids.sql.csv.read.double.enabled=true \\\n", "--py-files ${SPARK_PY_FILES}\n", "```\n", diff --git a/examples/Spark-ETL+XGBoost/taxi/notebooks/scala/taxi-ETL.ipynb b/examples/Spark-ETL+XGBoost/taxi/notebooks/scala/taxi-ETL.ipynb index 18c9b422d..f71d71893 100644 --- a/examples/Spark-ETL+XGBoost/taxi/notebooks/scala/taxi-ETL.ipynb +++ b/examples/Spark-ETL+XGBoost/taxi/notebooks/scala/taxi-ETL.ipynb @@ -19,15 +19,15 @@ "All data could be found at https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page\n", "\n", "### 2. Download needed jars\n", - "* [cudf-22.02.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/)\n", - "* [rapids-4-spark_2.12-22.02.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar)\n", + "* [cudf-22.04.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/)\n", + "* [rapids-4-spark_2.12-22.04.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar)\n", "\n", "### 3. Start Spark Standalone\n", "Before running the script, please setup Spark standalone mode\n", "\n", "### 4. Add ENV\n", "```\n", - "$ export SPARK_JARS=cudf-22.02.0-cuda11.jar,rapids-4-spark_2.12-22.02.0.jar\n", + "$ export SPARK_JARS=cudf-22.04.0-cuda11.jar,rapids-4-spark_2.12-22.04.0.jar\n", "\n", "```\n", "\n", diff --git a/examples/Spark-ETL+XGBoost/taxi/notebooks/scala/taxi_gpu_crossvalidation.ipynb b/examples/Spark-ETL+XGBoost/taxi/notebooks/scala/taxi_gpu_crossvalidation.ipynb index 635814773..a9e5a12d3 100644 --- a/examples/Spark-ETL+XGBoost/taxi/notebooks/scala/taxi_gpu_crossvalidation.ipynb +++ b/examples/Spark-ETL+XGBoost/taxi/notebooks/scala/taxi_gpu_crossvalidation.ipynb @@ -150,10 +150,10 @@ "```scala\n", "import org.apache.spark.sql.SparkSession\n", "val spark = SparkSession.builder().appName(\"Taxi-GPU\").getOrCreate\n", - "%AddJar file:/data/libs/cudf-22.02.0-cuda11.jar\n", - "%AddJar file:/data/libs/xgboost4j_3.0-1.4.2-0.2.0.jar\n", - "%AddJar file:/data/libs/xgboost4j-spark_3.0-1.4.2-0.2.0.jar\n", - "%AddJar file:/data/libs/rapids-4-spark-22.02.0.jar\n", + "%AddJar file:/data/libs/cudf-22.04.0-cuda11.jar\n", + "%AddJar file:/data/libs/xgboost4j_3.0-1.4.2-0.3.0.jar\n", + "%AddJar file:/data/libs/xgboost4j-spark_3.0-1.4.2-0.3.0.jar\n", + "%AddJar file:/data/libs/rapids-4-spark-22.04.0.jar\n", "// ...\n", "```" ] diff --git a/examples/Spark-cuML/pca/README.md b/examples/Spark-cuML/pca/README.md index 3ecb297d3..feca6fecb 100644 --- a/examples/Spark-cuML/pca/README.md +++ b/examples/Spark-cuML/pca/README.md @@ -12,14 +12,10 @@ User can also download the release jar from Maven central: [rapids-4-spark-ml_2.12-22.02.0-cuda11.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark-ml_2.12/22.02.0/rapids-4-spark-ml_2.12-22.02.0-cuda11.jar) -[cudf-22.02.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/cudf-22.02.0-cuda11.jar) +[cudf-22.04.0-cuda11.jar](https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/cudf-22.04.0-cuda11.jar) -[rapids-4-spark_2.12-22.02.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar) +[rapids-4-spark_2.12-22.04.0.jar](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar) -Note: before preceeding to the running section, make sure you have conda with cuDF installed by -``` -conda install -c rapidsai-nightly -c nvidia -c conda-forge cudf=21.12 python=3.8 -y -``` ## Sample code @@ -54,8 +50,8 @@ It is assumed that a Standalone Spark cluster has been set up, the `SPARK_MASTER ``` bash RAPIDS_ML_JAR=PATH_TO_rapids-4-spark-ml_2.12-22.02.0-cuda11.jar - CUDF_JAR=PATH_TO_cudf-22.02.0-cuda11.jar - PLUGIN_JAR=PATH_TO_rapids-4-spark_2.12-22.02.0.jar + CUDF_JAR=PATH_TO_cudf-22.04.0-cuda11.jar + PLUGIN_JAR=PATH_TO_rapids-4-spark_2.12-22.04.0.jar jupyter toree install \ --spark_home=${SPARK_HOME} \ diff --git a/examples/Spark-cuSpatial/README.md b/examples/Spark-cuSpatial/README.md index 17b1d157a..df6ec0cb5 100644 --- a/examples/Spark-cuSpatial/README.md +++ b/examples/Spark-cuSpatial/README.md @@ -39,7 +39,7 @@ or you can build it [in local](#build-in-local) machine after some prerequisites 4. [cuspatial](https://github.com/rapidsai/cuspatial): install libcuspatial ```Bash # get libcuspatial from conda - conda install -c rapidsai -c nvidia -c conda-forge -c defaults libcuspatial=22.02 + conda install -c rapidsai -c nvidia -c conda-forge -c defaults libcuspatial=22.04 # or below command for the nightly (aka SNAPSHOT) version. conda install -c rapidsai-nightly -c nvidia -c conda-forge -c defaults libcuspatial=22.04 ``` @@ -62,8 +62,8 @@ or you can build it [in local](#build-in-local) machine after some prerequisites 2. Set up a standalone cluster of Spark. Make sure the conda/lib is included in LD_LIBRARY_PATH, so that spark executors can load libcuspatial.so. 3. Download cuDF & spark-rapids jars - * [cuDF v22.02.0](https://repo1.maven.org/maven2/ai/rapids/cudf/22.02.0/cudf-22.02.0-cuda11.jar) or above - * [spark-rapids v22.02.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.02.0/rapids-4-spark_2.12-22.02.0.jar) or above + * [cuDF v22.04.0](https://repo1.maven.org/maven2/ai/rapids/cudf/22.04.0/cudf-22.04.0-cuda11.jar) or above + * [spark-rapids v22.04.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/22.04.0/rapids-4-spark_2.12-22.04.0.jar) or above 4. Prepare the dataset & jars. Copy the sample dataset from [cuspatial_data](../../datasets/cuspatial_data.tar.gz) to "/data/cuspatial_data". Copy cuDF, spark-rapids & spark-cuspatial-22.04-SNAPSHOT.jar to "/data/cuspatial_data/jars". You can use your own path, but remember to update the paths in "gpu-run.sh" accordingly. diff --git a/examples/Spark-cuSpatial/gpu-run.sh b/examples/Spark-cuSpatial/gpu-run.sh index 642603751..99760f2d0 100755 --- a/examples/Spark-cuSpatial/gpu-run.sh +++ b/examples/Spark-cuSpatial/gpu-run.sh @@ -31,7 +31,7 @@ rm -rf $DATA_OUT_PATH # the path to keep the jars of cudf, spark-rapids & spark-cuspatial JARS=$ROOT_PATH/jars -JARS_PATH=$JARS/cudf-22.02.0-cuda11.jar,$JARS/rapids-4-spark_2.12-22.02.0.jar,$JARS/spark-cuspatial-22.04-SNAPSHOT.jar +JARS_PATH=$JARS/cudf-22.04.0-cuda11.jar,$JARS/rapids-4-spark_2.12-22.04.0.jar,$JARS/spark-cuspatial-22.04-SNAPSHOT.jar $SPARK_HOME/bin/spark-submit --master spark://$HOSTNAME:7077 \ --name "Gpu Spatial Join UDF" \ diff --git a/examples/Spark-cuSpatial/notebooks/spatial_join_standalone.ipynb b/examples/Spark-cuSpatial/notebooks/spatial_join_standalone.ipynb index 09d3bb7ac..91d3a7c2a 100644 --- a/examples/Spark-cuSpatial/notebooks/spatial_join_standalone.ipynb +++ b/examples/Spark-cuSpatial/notebooks/spatial_join_standalone.ipynb @@ -9,7 +9,7 @@ "source": [ "from pyspark.sql import SparkSession\n", "spark = SparkSession.builder \\\n", - " .config(\"spark.jars\", \"/data/cuspatial_data/jars/cudf-22.02.0-cuda11.jar,/data/cuspatial_data/jars/rapids-4-spark_2.12-22.02.0.jar,/data/cuspatial_data/jars/spark-cuspatial-22.04-SNAPSHOT.jar\") \\\n", + " .config(\"spark.jars\", \"/data/cuspatial_data/jars/cudf-22.04.0-cuda11.jar,/data/cuspatial_data/jars/rapids-4-spark_2.12-22.04.0.jar,/data/cuspatial_data/jars/spark-cuspatial-22.04-SNAPSHOT.jar\") \\\n", " .config(\"spark.sql.adaptive.enabled\", \"false\") \\\n", " .config(\"spark.executor.memory\", \"20GB\") \\\n", " .config(\"spark.executor.cores\", \"6\") \\\n", diff --git a/examples/Spark-cuSpatial/pom.xml b/examples/Spark-cuSpatial/pom.xml index fc31ad53f..51992fb3b 100644 --- a/examples/Spark-cuSpatial/pom.xml +++ b/examples/Spark-cuSpatial/pom.xml @@ -30,7 +30,7 @@ 1.8 1.8 8 - 22.02.0 + 22.04.0 2.12 3.2.0 ${project.build.directory}/cpp-build diff --git a/examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb b/examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb index 7af48908a..6c41566a9 100644 --- a/examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb +++ b/examples/micro-benchmarks/notebooks/micro-benchmarks-gpu.ipynb @@ -22,8 +22,8 @@ "import os\n", "# Change to your cluster ip:port and directories\n", "SPARK_MASTER_URL = os.getenv(\"SPARK_MASTER_URL\", \"spark:your-ip:port\")\n", - "CUDF_JAR = os.getenv(\"CUDF_JAR\", \"/your-path/cudf-22.02.0-cuda11.jar\")\n", - "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-22.02.0.jar\")\n" + "CUDF_JAR = os.getenv(\"CUDF_JAR\", \"/your-path/cudf-22.04.0-cuda11.jar\")\n", + "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-22.04.0.jar\")\n" ] }, {