diff --git a/dev/spark-test-image-util/docs/build-docs b/dev/spark-test-image-util/docs/build-docs new file mode 100644 index 0000000000000..6ff9c7cd9455c --- /dev/null +++ b/dev/spark-test-image-util/docs/build-docs @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if ! [ -x "$(command -v docker)" ]; then + echo "Error: Docker is not installed." >&2 + exit 1 +fi + +DOCKER_CACHE_IMG="ghcr.io/apache/spark/apache-spark-github-action-image-docs-cache:master" +REPO_OWNER="apache/spark" +REPOSITORY="apache-spark-ci-image-docs" +IMG_TAG=$(date +%s) +IMG_NAME="${REPOSITORY}:${IMG_TAG}" +IMG_URL="$REPO_OWNER/$IMG_NAME" +DOCKER_MOUNT_SPARK_HOME="/__w/spark/spark" +BUILD_DOCS_SCRIPT_PATH="${DOCKER_MOUNT_SPARK_HOME}/dev/spark-test-image-util/docs/run-in-container" + +FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)" +SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/../../..; pwd)" + +# 1.Compile spark outside the container to prepare for generating documents inside the container. +build/sbt -Phive -Pkinesis-asl clean unidoc package + +# 2.Build container image. +docker buildx build \ + --cache-from type=registry,ref="${DOCKER_CACHE_IMG}" \ + --tag "${IMG_URL}" "${FWDIR}" \ + --file "${SPARK_HOME}/dev/spark-test-image/docs/Dockerfile" + +# 3.Build docs on container: `error docs`, `scala doc`, `python doc`, `sql doc`. +docker run \ + --mount type=bind,source="${SPARK_HOME}",target="${DOCKER_MOUNT_SPARK_HOME}" \ + --interactive --tty "${IMG_URL}" \ + /bin/bash -c "sh ${BUILD_DOCS_SCRIPT_PATH}" + +# 4.Build docs on host: `r doc`. +# +# Why does `r` document need to be compiled outside the container? +# Because when compiling inside the container, the permission of the directory +# `/__w/spark/spark/R/pkg/docs` automatically generated by `RScript` is `dr-xr--r-x`, +# and when writing to subsequent files, will throw an error as: +# `! [EACCES] Failed to copy '/usr/local/lib/R/site-library/pkgdown/BS5/assets/katex-auto.js' +# to '/__w/spark/spark/R/pkg/docs/katex-auto.js': permission denied` +export SKIP_ERRORDOC=1 +export SKIP_SCALADOC=1 +export SKIP_PYTHONDOC=1 +export SKIP_SQLDOC=1 +cd docs +bundle exec jekyll build + +# 5.Remove container image. +IMG_ID=$(docker images | grep "${IMG_TAG}" | awk '{print $3}') +docker image rm --force "${IMG_ID}" + +echo "Build doc done." diff --git a/dev/spark-test-image-util/docs/run-in-container b/dev/spark-test-image-util/docs/run-in-container new file mode 100644 index 0000000000000..1d43c602f7c72 --- /dev/null +++ b/dev/spark-test-image-util/docs/run-in-container @@ -0,0 +1,37 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# 1.Set env variable. +export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-arm64 +export PATH=$JAVA_HOME/bin:$PATH +export SPARK_DOCS_IS_BUILT_ON_HOST=1 +# We expect to compile the R document on the host. +export SKIP_RDOC=1 + +# 2.Install bundler. +gem install bundler -v 2.4.22 +cd /__w/spark/spark/docs +bundle install + +# 3.Build docs, includes: `error docs`, `scala doc`, `python doc`, `sql doc`, excludes: `r doc`. +# We need this link to make sure `python3` points to `python3.9` which contains the prerequisite packages. +ln -s "$(which python3.9)" "/usr/local/bin/python3" + +# Build docs first with SKIP_API to ensure they are buildable without requiring any +# language docs to be built beforehand. +cd /__w/spark/spark/docs +bundle exec jekyll build diff --git a/docs/_plugins/build_api_docs.rb b/docs/_plugins/build_api_docs.rb index 79aad9695a3c7..e2ddcca6cdde5 100644 --- a/docs/_plugins/build_api_docs.rb +++ b/docs/_plugins/build_api_docs.rb @@ -34,6 +34,11 @@ def print_header(text) end def build_spark_if_necessary + # If spark has already been compiled on the host, skip here. + if ENV['SPARK_DOCS_IS_BUILT_ON_HOST'] == '1' + return + end + if $spark_package_is_built return end @@ -116,6 +121,16 @@ def copy_and_update_java_docs(source, dest, scala_source) File.open(css_file, 'a') { |f| f.write("\n" + css.join()) } end +def build_spark_scala_and_java_docs_if_necessary + # If spark's docs has already been compiled on the host, skip here. + if ENV['SPARK_DOCS_IS_BUILT_ON_HOST'] == '1' + return + end + + command = "build/sbt -Pkinesis-asl unidoc" + puts "Running '#{command}'..." + system(command) || raise("Unidoc generation failed") +end def build_scala_and_java_docs build_spark_if_necessary @@ -123,9 +138,7 @@ def build_scala_and_java_docs print_header "Building Scala and Java API docs." cd(SPARK_PROJECT_ROOT) - command = "build/sbt -Pkinesis-asl unidoc" - puts "Running '#{command}'..." - system(command) || raise("Unidoc generation failed") + build_spark_scala_and_java_docs_if_necessary puts "Moving back into docs dir." cd("docs")