From e6349915127e2fb3954594f02543523f4dc46aae Mon Sep 17 00:00:00 2001 From: Evan Bonsignori Date: Fri, 13 Dec 2024 13:07:42 -0800 Subject: [PATCH] pass auth to Docker & clone needed repos at build time (#53618) --- .github/workflows/moda-ci.yaml | 42 ++++- Dockerfile | 176 +++++++++++------- src/deployments/README.md | 6 + src/deployments/production/README.md | 31 +++ .../build-scripts/clone-or-use-cached-repo.sh | 37 ++++ .../production/build-scripts/fetch-repos.sh | 52 ++++++ .../build-scripts/merge-early-access.sh | 8 + 7 files changed, 285 insertions(+), 67 deletions(-) create mode 100644 src/deployments/README.md create mode 100644 src/deployments/production/README.md create mode 100644 src/deployments/production/build-scripts/clone-or-use-cached-repo.sh create mode 100644 src/deployments/production/build-scripts/fetch-repos.sh create mode 100755 src/deployments/production/build-scripts/merge-early-access.sh diff --git a/.github/workflows/moda-ci.yaml b/.github/workflows/moda-ci.yaml index 0545be426c12..728104707fe5 100644 --- a/.github/workflows/moda-ci.yaml +++ b/.github/workflows/moda-ci.yaml @@ -12,9 +12,33 @@ on: types: [checks_requested] jobs: + ########################## + # Generate Vault keys + ########################## + set-vault-keys: + runs-on: ubuntu-latest + outputs: + modified_vault_keys: ${{ steps.modify_vault_keys.outputs.modified }} + steps: + - name: Set vault-keys output + id: modify_vault_keys + run: | + if [ -z "${{ vars.VAULT_KEYS }}" ]; then + # We want to add the DOCS_BOT_PAT_READPUBLICKEY to the list of keys + # so that builds fetch the secret from the docs-internal vault + # where --environment is "ci" + echo "modified=DOCS_BOT_PAT_READPUBLICKEY" >> $GITHUB_OUTPUT + else + echo "modified=${{ vars.VAULT_KEYS }},DOCS_BOT_PAT_READPUBLICKEY" >> $GITHUB_OUTPUT + fi + + ############# + # Moda jobs + ############# moda-config-bundle: if: ${{ github.repository == 'github/docs-internal' }} name: ${{ matrix.ci_job.job }} + needs: set-vault-keys strategy: fail-fast: false matrix: @@ -22,14 +46,18 @@ jobs: uses: github/internal-actions/.github/workflows/moda.yml@main with: ci-formatted-job-name: ${{ matrix.ci_job.job }} - vault-keys: ${{ vars.VAULT_KEYS }} + vault-keys: ${{ needs.set-vault-keys.outputs.modified_vault_keys }} secrets: dx-bot-token: ${{ secrets.INTERNAL_ACTIONS_DX_BOT_ACCOUNT_TOKEN }} datadog-api-key: ${{ secrets.DATADOG_API_KEY }} + ############# + # Docker Image jobs + ############# docker-image: if: ${{ github.repository == 'github/docs-internal' }} name: ${{ matrix.ci_job.job }} + needs: set-vault-keys strategy: fail-fast: false matrix: @@ -37,14 +65,20 @@ jobs: uses: github/internal-actions/.github/workflows/kube.yml@main with: ci-formatted-job-name: ${{ matrix.ci_job.job }} - vault-keys: ${{ vars.VAULT_KEYS }} + vault-keys: ${{ needs.set-vault-keys.outputs.modified_vault_keys }} + # Passes 'DOCS_BOT_PAT_READPUBLICKEY' secret from Vault to docker as --secret id=DOCS_BOT_PAT_READPUBLICKEY,src= + docker-build-env-secrets: 'DOCS_BOT_PAT_READPUBLICKEY' secrets: dx-bot-token: ${{ secrets.INTERNAL_ACTIONS_DX_BOT_ACCOUNT_TOKEN }} datadog-api-key: ${{ secrets.DATADOG_API_KEY }} + ############# + # Docker Security jobs + ############# docker-security: if: ${{ github.repository == 'github/docs-internal' }} name: ${{ matrix.ci_job.job }} + needs: set-vault-keys strategy: fail-fast: false matrix: @@ -52,7 +86,9 @@ jobs: uses: github/internal-actions/.github/workflows/docker_security.yml@main with: ci-formatted-job-name: ${{ matrix.ci_job.job }} - vault-keys: ${{ vars.VAULT_KEYS }} + vault-keys: ${{ needs.set-vault-keys.outputs.modified_vault_keys }} + # Passes 'DOCS_BOT_PAT_READPUBLICKEY' secret from Vault to docker as --secret id=DOCS_BOT_PAT_READPUBLICKEY,src= + docker-build-env-secrets: 'DOCS_BOT_PAT_READPUBLICKEY' secrets: dx-bot-token: ${{ secrets.INTERNAL_ACTIONS_DX_BOT_ACCOUNT_TOKEN }} datadog-api-key: ${{ secrets.DATADOG_API_KEY }} diff --git a/Dockerfile b/Dockerfile index dd9033bb3752..8f9101ffe6a0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,6 @@ -# This Dockerfile is used for docker-based deployments to Azure for both preview environments and production +# This Dockerfile is used solely for production deployments to Moda +# For staging deployments, see src/deployments/staging/Dockerfile +# For building this file locally, see src/deployments/production/README.md # -------------------------------------------------------------------------------- # BASE IMAGE @@ -9,104 +11,150 @@ FROM node:22-alpine@sha256:c13b26e7e602ef2f1074aef304ce6e9b7dd284c419b35d89fcf3c # This directory is owned by the node user ARG APP_HOME=/home/node/app - -# Make sure we don't run anything as the root user -USER node - +RUN mkdir -p $APP_HOME && chown -R node:node $APP_HOME WORKDIR $APP_HOME +# Switch to root to ensure we have permissions to copy, chmod, and install +USER root -# --------------- -# ALL DEPS -# --------------- -FROM base AS all_deps +# Install git for cloning docs-early-access & translations repos +# Install curl for determining the early access branch +RUN apk add --no-cache git curl -COPY --chown=node:node package.json package-lock.json ./ +# Copy in build scripts +COPY src/deployments/production/build-scripts/*.sh ./build-scripts/ -RUN npm ci --no-optional --registry https://registry.npmjs.org/ +# Make scripts executable +RUN chmod +x build-scripts/*.sh -# For Next.js v12+ -# This the appropriate necessary extra for node:VERSION-alpine -# Other options are https://www.npmjs.com/search?q=%40next%2Fswc -RUN npm i @next/swc-linux-x64-musl --no-save || npm i @next/swc-linux-arm64-musl --no-save +# We need to copy over content that will be merged with early-access +COPY content ./content +COPY assets ./assets +COPY data ./data +# Use the mounted --secret to: +# - 1. Fetch the docs-internal repo +# - 2. Fetch the docs-early-access repo & override docs-internal with early access content +# - 3. Fetch each translations repo to the repo/translations directory +# We use --mount-type=secret to avoid the secret being copied into the image layers for security +# The secret passed via --secret can only be used in this RUN command +RUN --mount=type=secret,id=DOCS_BOT_PAT_READPUBLICKEY \ + # We don't cache because Docker can't know if we need to fetch new content from remote repos + echo "Don't cache this step by printing date: $(date)" && \ + . ./build-scripts/fetch-repos.sh + +# Give node user access to the copied content since we cloned as root +RUN chown -R node:node $APP_HOME/content +RUN chown -R node:node $APP_HOME/assets +RUN chown -R node:node $APP_HOME/data +# Give node user access to translations repos +RUN chown -R node:node $APP_HOME/translations + +# Change back to node to make sure we don't run anything as the root user +USER node # --------------- -# PROD DEPS +# ALL DEPS Image # --------------- -FROM all_deps AS prod_deps +FROM base AS all_deps -RUN npm prune --production +ARG APP_HOME=/home/node/app +USER node +WORKDIR $APP_HOME + +# Copy what is needed to run npm ci +COPY --chown=node:node package.json package-lock.json ./ +RUN npm ci --no-optional --registry https://registry.npmjs.org/ # --------------- -# BUILDER +# BUILDER Image # --------------- FROM all_deps AS builder -COPY src ./src -# The star is because it's an optional directory -COPY .remotejson-cache* ./.remotejson-cache -# The star is because it's an optional file -COPY .pageinfo-cache.json.br* ./.pageinfo-cache.json.br -# Certain content is necessary for being able to build -COPY content/index.md ./content/index.md -COPY content/rest ./content/rest -COPY data ./data +ARG APP_HOME=/home/node/app +USER node +WORKDIR $APP_HOME -COPY next.config.js ./next.config.js -COPY tsconfig.json ./tsconfig.json +# Copy what is needed to: +# 1. Build the app +# 2. run warmup-remotejson script +# 3. run precompute-pageinfo script +# Dependencies +COPY --chown=node:node --from=all_deps $APP_HOME/node_modules $APP_HOME/node_modules +# Content with merged early-access content +COPY --chown=node:node --from=base $APP_HOME/data ./data +COPY --chown=node:node --from=base $APP_HOME/assets ./assets +COPY --chown=node:node --from=base $APP_HOME/content ./content +# Source code +COPY --chown=node:node --from=all_deps $APP_HOME/package.json ./ +COPY src ./src +COPY next.config.js ./ +COPY tsconfig.json ./ +# 1. Build RUN npm run build +# 2. Warm up the remotejson cache +RUN npm run warmup-remotejson + +# 3. Precompute the pageinfo cache +RUN npm run precompute-pageinfo -- --max-versions 2 + +# Prune deps for prod image +RUN npm prune --production + # -------------------------------------------------------------------------------- -# PREVIEW IMAGE - no translations +# PRODUCTION IMAGE # -------------------------------------------------------------------------------- +FROM base AS production -FROM base AS preview +ARG APP_HOME=/home/node/app +USER node +WORKDIR $APP_HOME -# Copy just prod dependencies -COPY --chown=node:node --from=prod_deps $APP_HOME/node_modules $APP_HOME/node_modules +# Copy the content with merged early-access content +COPY --chown=node:node --from=base $APP_HOME/data ./data +COPY --chown=node:node --from=base $APP_HOME/assets ./assets +COPY --chown=node:node --from=base $APP_HOME/content ./content -# Copy our front-end code -COPY --chown=node:node --from=builder $APP_HOME/.next $APP_HOME/.next +# Include cloned translations +COPY --chown=node:node --from=base $APP_HOME/translations ./translations -# We should always be running in production mode -ENV NODE_ENV=production +# Copy prod dependencies +COPY --chown=node:node --from=builder $APP_HOME/package.json ./ +COPY --chown=node:node --from=builder $APP_HOME/node_modules $APP_HOME/node_modules -# Preferred port for server.js -ENV PORT=4000 +# Copy built artifacts needed at runtime for the server +COPY --chown=node:node --from=builder $APP_HOME/.next $APP_HOME/.next -ENV ENABLED_LANGUAGES="en" +# Copy cache files generated during build scripts +COPY --chown=node:node --from=builder $APP_HOME/.remotejson-cache ./.remotejson-cache +COPY --chown=node:node --from=builder $APP_HOME/.pageinfo-cache.json.br* ./.pageinfo-cache.json.br +# Copy only what's needed to run the server +COPY --chown=node:node --from=builder $APP_HOME/src ./src +COPY --chown=node:node --from=builder $APP_HOME/.remotejson-cache ./.remotejson-cache +COPY --chown=node:node --from=builder $APP_HOME/.pageinfo-cache.json.br* ./.pageinfo-cache.json.br +COPY --chown=node:node --from=builder $APP_HOME/next.config.js ./ +COPY --chown=node:node --from=builder $APP_HOME/tsconfig.json ./ + +# - - - +# Environment variables +# - - - # This makes it possible to set `--build-arg BUILD_SHA=abc123` # and it then becomes available as an environment variable in the docker run. ARG BUILD_SHA ENV BUILD_SHA=$BUILD_SHA - -# Copy only what's needed to run the server -COPY --chown=node:node package.json ./ -COPY --chown=node:node assets ./assets -COPY --chown=node:node content ./content -COPY --chown=node:node src ./src -COPY --chown=node:node .remotejson-cache* ./.remotejson-cache -COPY --chown=node:node .pageinfo-cache.json.br* ./.pageinfo-cache.json.br -COPY --chown=node:node data ./data -COPY --chown=node:node next.config.js ./ -COPY --chown=node:node tsconfig.json ./ +# We should always be running in production mode +ENV NODE_ENV=production +# Preferred port for server.js +ENV PORT=4000 +# Include all languages +ENV ENABLED_LANGUAGES="en,zh,es,pt,ru,ja,fr,de,ko" EXPOSE $PORT +# Entrypoint to start the server +# Note: Currently we have to use tsx because we have a mix of `.ts` and `.js` files with multiple import patterns CMD ["node_modules/.bin/tsx", "src/frame/server.ts"] - -# -------------------------------------------------------------------------------- -# PRODUCTION IMAGE - includes all translations -# -------------------------------------------------------------------------------- -FROM preview AS production - -# Override what was set for previews -# Make this match the default of `Object.keys(languages)` in src/languages/lib/languages.js -ENV ENABLED_LANGUAGES "en,zh,es,pt,ru,ja,fr,de,ko" - -# Copy in all translations -COPY --chown=node:node translations ./translations diff --git a/src/deployments/README.md b/src/deployments/README.md new file mode 100644 index 000000000000..d3564bbcf966 --- /dev/null +++ b/src/deployments/README.md @@ -0,0 +1,6 @@ +# Deployments + +Documentation and build files for our deployments. + +- For production deploys: [src/deployments/production](./production/) +- For staging deploys (includes review servers): [src/deployments/staging](./staging/) diff --git a/src/deployments/production/README.md b/src/deployments/production/README.md new file mode 100644 index 000000000000..12892b0dde88 --- /dev/null +++ b/src/deployments/production/README.md @@ -0,0 +1,31 @@ +# Production deploys + +For internal Docs relating to our production deploys see [TODO Docs-Engineering URL] + +## Auto deploys + +Pushing to `main` on `docs-internal` will automatically kick off a deploy to production. + +The status of deployments are posted in the `#docs-ops` Slack channel. + +## Building & running the production image locally + +Build the production Docker image locally, + +```bash +docker build -t docs:latest . --secret id=DOCS_BOT_PAT_READPUBLICKEY,src=<(echo "") +``` + +Where `` must be a PAT with `contents: read` access to: + +1. `docs-internal.` for every `` translation repo +2. `docs-early-access` + +Run the built image, + +```bash +docker run -p 4000:4000 docs:latest +``` + +> [!NOTE] +> We require `DOCKER_BUILDKIT=1` to support passing `--secret` to the Dockerfile which allows us to clone private repos at build time. This is done in Moda via the `docker-build-env-secrets` argument in the [.github/workflows/moda-ci.yaml](../../.github/workflows/moda-ci.yaml) workflow. \ No newline at end of file diff --git a/src/deployments/production/build-scripts/clone-or-use-cached-repo.sh b/src/deployments/production/build-scripts/clone-or-use-cached-repo.sh new file mode 100644 index 000000000000..b7742c3d5ea2 --- /dev/null +++ b/src/deployments/production/build-scripts/clone-or-use-cached-repo.sh @@ -0,0 +1,37 @@ +set -e + +# We use this function to use the cached version of the repo if it exists from +# a previous Dockerfile build. Otherwise, we clone the repo and check out the +# specified branch/SHA. +# Arguments: +# $1 - Repository name (for directory naming) +# $2 - Repository URL +# $3 - Branch to clone +clone_or_use_cached_repo() { + repo_name="$1" + repo_url="$2" + branch="$3" + + echo "Processing repository '$repo_name'..." + + if [ -d "$repo_name/.git" ]; then + echo "Repository '$repo_name' already exists. Fetching updates..." + cd "$repo_name" + + # Fetch latest changes + git fetch origin "$branch" + git checkout "$branch" + git pull origin "$branch" + + echo "Updated repository '$repo_name' with the latest changes from $branch." + + cd .. + else + echo "Cloning repository '$repo_name' from branch '$branch'..." + + # We only need the most recent change for production deploys, so we use --depth 1 + git clone --depth 1 --branch "$branch" "https://${GITHUB_TOKEN}@github.com/github/$repo_url.git" "$repo_name" + fi + + echo "Repository '$repo_name' is up to date." +} \ No newline at end of file diff --git a/src/deployments/production/build-scripts/fetch-repos.sh b/src/deployments/production/build-scripts/fetch-repos.sh new file mode 100644 index 000000000000..42cd81164c2b --- /dev/null +++ b/src/deployments/production/build-scripts/fetch-repos.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env sh + +# +# This script is intended to be called from the production Dockerfile +# Though it isn't working with all of the files from docs-internal (it only COPYs what is needed), +# it is useful to think of these scripts running from the root of the docs-internal repo. +# + +# Fetches and resolves docs-internal, early-access, and translations repos +echo "Fetching and resolving early-access, and translations repos" + +# Exit immediately if a command exits with a non-zero status +set -e + +# Import the clone_or_use_cached_repo function +. ./build-scripts/clone-or-use-cached-repo.sh + +# Set the GITHUB_TOKEN environment variable from the mounted --secret passed to Docker build +GITHUB_TOKEN=$(cat /run/secrets/DOCS_BOT_PAT_READPUBLICKEY) + +# - - - - - - - - - - +# Early access +# - - - - - - - - - - +echo "Fetching early access..." +clone_or_use_cached_repo "docs-early-access" "docs-early-access" "main" +echo "Merging early access..." +. ./build-scripts/merge-early-access.sh + +# - - - - - - - - - - +# Clone the translations repos +# - - - - - - - - - - +# Make sure to clone each translation repo into the `translations` directory inside the root of docs-internal (the Dockerfile's WORKDIR) +mkdir -p translations +cd translations + +# Iterate over each language +echo "Fetching translations..." +for lang in "zh-cn" "es-es" "pt-br" "ru-ru" "ja-jp" "fr-fr" "de-de" "ko-kr" +do + translations_repo="docs-internal.$lang" + clone_or_use_cached_repo "$lang" "$translations_repo" "main" +done +echo "Done fetching translations." + +# Go back to the root of the docs-internal repo +cd .. + +# - - - - - - - - - - +# Cleanup +# - - - - - - - - - - +# Delete GITHUB_TOKEN from the environment +unset GITHUB_TOKEN \ No newline at end of file diff --git a/src/deployments/production/build-scripts/merge-early-access.sh b/src/deployments/production/build-scripts/merge-early-access.sh new file mode 100755 index 000000000000..b7e711b1660c --- /dev/null +++ b/src/deployments/production/build-scripts/merge-early-access.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env sh + +# Takes docs-early-access files and merges them into docs-internal +# Assumed that it is being run from the root of the docs-internal repo + +mv docs-early-access/assets/images assets/images/early-access +mv docs-early-access/content content/early-access +mv docs-early-access/data data/early-access