From 59636c9b1ea7496c7cb79d83c7d5a79081135487 Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Tue, 14 May 2024 21:55:48 -0700 Subject: [PATCH] rearrange buildomat jobs; rewrite releng process in rust and aggressively parallelize (#5744) (Note: documentation says `cargo xtask releng` but I am going to wire that up in a follow-up PR; the current equivalent is `cargo run --release --bin omicron-releng`.) Prior to this change we have five main "release engineering" Buildomat jobs that do operations beyond running the test suite: - a **package** job which runs omicron-package in various configurations, - a **build OS images** job which builds the host and trampoline images, - a **TUF repo** job which builds the final TUF repo *(this is the build artifact we actually want)*, - a **deploy** job which uses the single-sled packages to test that a VM boots to SSH *(this is a test we actually want)*, - and a **CI tools** job which builds common tools used by multiple jobs. This looks like: ```mermaid graph LR package --> host-image["build OS images"] package --> deploy package --> tuf-repo["TUF repo"] host-image --> tuf-repo ci-tools["CI tools"] --> deploy ci-tools --> tuf-repo ``` (There are also the currently-disabled a4x2 jobs but those are independent of this particular graph.) I think the initial idea behind this was to reuse build artifacts where possible, but this is pretty complicated and adds a lot more output upload/download overhead than expected, which slows down the time to get the end artifact we actually want. This PR changes the graph to: ```mermaid graph LR package --> deploy tuf-repo["TUF repo"] ``` And the **TUF repo** job primarily runs a new **releng** binary, which runs all of the steps required to download and build all the components of the TUF repo in a single task, using a terrible job runner I wrote. The primary goal here was to reduce the time from pushing a commit to getting a TUF repo out the other end; this drops time-to-TUF-repo from ~80 minutes to ~45. In the process this also made it much easier to build a TUF repo (and iterate on that process) locally: just run `cargo xtask releng` (TODO: soon). It also deleted a lot of Bash. One thing to note is that, in service of the mission to get time-to-TUF-repo down as much as possible, that job _only_ uploads the TUF repo (and some logs). I also put all of the outputs for the **package** job into a single tarball for the **deploy** job to unpack. There are no longer separate uploads for the OS images and each zone; these can be extracted from the repo as we normally do. --- .github/buildomat/jobs/ci-tools.sh | 77 --- .github/buildomat/jobs/deploy.sh | 12 +- .github/buildomat/jobs/host-image.sh | 93 ---- .github/buildomat/jobs/package.sh | 115 +--- .github/buildomat/jobs/tuf-repo.sh | 138 ++--- Cargo.lock | 46 +- Cargo.toml | 5 +- caboose-util/Cargo.toml | 13 - caboose-util/src/main.rs | 32 -- dev-tools/releng/Cargo.toml | 35 ++ dev-tools/releng/src/cmd.rs | 167 ++++++ dev-tools/releng/src/hubris.rs | 148 +++++ dev-tools/releng/src/job.rs | 305 ++++++++++ dev-tools/releng/src/main.rs | 734 +++++++++++++++++++++++++ dev-tools/releng/src/tuf.rs | 149 +++++ dev-tools/xtask/Cargo.toml | 2 +- docs/releng.adoc | 81 +++ package-manifest.toml | 2 +- package/src/bin/omicron-package.rs | 25 +- package/src/lib.rs | 5 + tools/build-host-image.sh | 111 ---- tools/hubris_checksums | 8 - tools/hubris_version | 1 - tools/permslip_commit | 1 - tufaceous-lib/src/assemble/manifest.rs | 13 +- workspace-hack/Cargo.toml | 10 +- 26 files changed, 1754 insertions(+), 574 deletions(-) delete mode 100755 .github/buildomat/jobs/ci-tools.sh delete mode 100755 .github/buildomat/jobs/host-image.sh delete mode 100644 caboose-util/Cargo.toml delete mode 100644 caboose-util/src/main.rs create mode 100644 dev-tools/releng/Cargo.toml create mode 100644 dev-tools/releng/src/cmd.rs create mode 100644 dev-tools/releng/src/hubris.rs create mode 100644 dev-tools/releng/src/job.rs create mode 100644 dev-tools/releng/src/main.rs create mode 100644 dev-tools/releng/src/tuf.rs create mode 100644 docs/releng.adoc delete mode 100755 tools/build-host-image.sh delete mode 100644 tools/hubris_checksums delete mode 100644 tools/hubris_version delete mode 100644 tools/permslip_commit diff --git a/.github/buildomat/jobs/ci-tools.sh b/.github/buildomat/jobs/ci-tools.sh deleted file mode 100755 index 4c58731e24..0000000000 --- a/.github/buildomat/jobs/ci-tools.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/bin/bash -#: -#: name = "helios / CI tools" -#: variety = "basic" -#: target = "helios-2.0" -#: rust_toolchain = "1.72.1" -#: output_rules = [ -#: "=/work/end-to-end-tests/*.gz", -#: "=/work/caboose-util.gz", -#: "=/work/tufaceous.gz", -#: "=/work/commtest", -#: "=/work/permslip.gz", -#: ] -#: access_repos = [ -#: "oxidecomputer/permission-slip", -#: "oxidecomputer/sshauth" -#: ] - -set -o errexit -set -o pipefail -set -o xtrace - -cargo --version -rustc --version - -ptime -m ./tools/install_builder_prerequisites.sh -yp - -########## end-to-end-tests ########## - -banner end-to-end-tests - -# -# Reduce debuginfo just to line tables. -# -export CARGO_PROFILE_DEV_DEBUG=1 -export CARGO_PROFILE_TEST_DEBUG=1 -export CARGO_INCREMENTAL=0 - -ptime -m cargo build --locked -p end-to-end-tests --tests --bin bootstrap \ - --message-format json-render-diagnostics >/tmp/output.end-to-end.json - -mkdir -p /work -ptime -m cargo build --locked -p end-to-end-tests --tests --bin commtest -cp target/debug/commtest /work/commtest - -mkdir -p /work/end-to-end-tests -for p in target/debug/bootstrap $(/opt/ooce/bin/jq -r 'select(.profile.test) | .executable' /tmp/output.end-to-end.json); do - # shellcheck disable=SC2094 - ptime -m gzip < "$p" > /work/end-to-end-tests/"$(basename "$p").gz" -done - -########## caboose-util ########## - -banner caboose-util - -ptime -m cargo build --locked -p caboose-util --release -ptime -m gzip < target/release/caboose-util > /work/caboose-util.gz - -########## tufaceous ########## - -banner tufaceous - -ptime -m cargo build --locked -p tufaceous --release -ptime -m gzip < target/release/tufaceous > /work/tufaceous.gz - -########## permission-slip ########## - -banner permission-slip - -source "./tools/permslip_commit" -git init /work/permission-slip-build -pushd /work/permission-slip-build -git remote add origin https://github.com/oxidecomputer/permission-slip.git -ptime -m git fetch --depth 1 origin "$COMMIT" -git checkout FETCH_HEAD -ptime -m cargo build --locked -p permission-slip-client --release -ptime -m gzip < target/release/permslip > /work/permslip.gz diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 8d3e94cd5e..c947a05e10 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -20,8 +20,6 @@ #: [dependencies.package] #: job = "helios / package" #: -#: [dependencies.ci-tools] -#: job = "helios / CI tools" set -o errexit set -o pipefail @@ -144,13 +142,6 @@ pfexec chown build:build /opt/oxide/work cd /opt/oxide/work ptime -m tar xvzf /input/package/work/package.tar.gz -cp /input/package/work/zones/* out/ -mv out/nexus-single-sled.tar.gz out/nexus.tar.gz -mkdir tests -for p in /input/ci-tools/work/end-to-end-tests/*.gz; do - ptime -m gunzip < "$p" > "tests/$(basename "${p%.gz}")" - chmod a+x "tests/$(basename "${p%.gz}")" -done # Ask buildomat for the range of extra addresses that we're allowed to use, and # break them up into the ranges we need. @@ -354,7 +345,7 @@ echo "Waited for nexus: ${retry}s" export RUST_BACKTRACE=1 export E2E_TLS_CERT IPPOOL_START IPPOOL_END -eval "$(./tests/bootstrap)" +eval "$(./target/debug/bootstrap)" export OXIDE_HOST OXIDE_TOKEN # @@ -387,7 +378,6 @@ done /usr/oxide/oxide --resolve "$OXIDE_RESOLVE" --cacert "$E2E_TLS_CERT" \ image promote --project images --image debian11 -rm ./tests/bootstrap for test_bin in tests/*; do ./"$test_bin" done diff --git a/.github/buildomat/jobs/host-image.sh b/.github/buildomat/jobs/host-image.sh deleted file mode 100755 index 2f4d146a48..0000000000 --- a/.github/buildomat/jobs/host-image.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/bash -#: -#: name = "helios / build OS images" -#: variety = "basic" -#: target = "helios-2.0" -#: rust_toolchain = "1.72.1" -#: output_rules = [ -#: "=/work/helios/upload/os-host.tar.gz", -#: "=/work/helios/upload/os-trampoline.tar.gz", -#: ] -#: access_repos = [ -#: "oxidecomputer/amd-apcb", -#: "oxidecomputer/amd-efs", -#: "oxidecomputer/amd-firmware", -#: "oxidecomputer/amd-flash", -#: "oxidecomputer/amd-host-image-builder", -#: "oxidecomputer/boot-image-tools", -#: "oxidecomputer/chelsio-t6-roms", -#: "oxidecomputer/compliance-pilot", -#: "oxidecomputer/facade", -#: "oxidecomputer/helios", -#: "oxidecomputer/helios-omicron-brand", -#: "oxidecomputer/helios-omnios-build", -#: "oxidecomputer/helios-omnios-extra", -#: "oxidecomputer/nanobl-rs", -#: ] -#: -#: [dependencies.package] -#: job = "helios / package" -#: -#: [[publish]] -#: series = "image" -#: name = "os.tar.gz" -#: from_output = "/work/helios/image/output/os.tar.gz" -#: - -set -o errexit -set -o pipefail -set -o xtrace - -cargo --version -rustc --version - -TOP=$PWD - -source "$TOP/tools/include/force-git-over-https.sh" - -# Check out helios into /work/helios -HELIOSDIR=/work/helios -git clone https://github.com/oxidecomputer/helios.git "$HELIOSDIR" -cd "$HELIOSDIR" -# Record the branch and commit in the output -git status --branch --porcelain=2 -# Setting BUILD_OS to no makes setup skip repositories we don't need for -# building the OS itself (we are just building an image from already built OS). -BUILD_OS=no gmake setup - -# Commands that "helios-build" would ask us to run (either explicitly or -# implicitly, to avoid an error). -rc=0 -pfexec pkg install -q /system/zones/brand/omicron1/tools || rc=$? -case $rc in - # `man pkg` notes that exit code 4 means no changes were made because - # there is nothing to do; that's fine. Any other exit code is an error. - 0 | 4) ;; - *) exit $rc ;; -esac - -pfexec zfs create -p "rpool/images/$USER" - - -# TODO: Consider importing zones here too? - -cd "$TOP" -OUTPUTDIR="$HELIOSDIR/upload" -mkdir "$OUTPUTDIR" - -banner OS -./tools/build-host-image.sh -B \ - -S /input/package/work/zones/switch-asic.tar.gz \ - "$HELIOSDIR" \ - /input/package/work/global-zone-packages.tar.gz - -mv "$HELIOSDIR/image/output/os.tar.gz" "$OUTPUTDIR/os-host.tar.gz" - -banner Trampoline - -./tools/build-host-image.sh -R \ - "$HELIOSDIR" \ - /input/package/work/trampoline-global-zone-packages.tar.gz - -mv "$HELIOSDIR/image/output/os.tar.gz" "$OUTPUTDIR/os-trampoline.tar.gz" - diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 11a5a1a0ee..63e5e1ce71 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -3,24 +3,11 @@ #: name = "helios / package" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.72.1" +#: rust_toolchain = "1.77.2" #: output_rules = [ -#: "=/work/version.txt", #: "=/work/package.tar.gz", -#: "=/work/global-zone-packages.tar.gz", -#: "=/work/trampoline-global-zone-packages.tar.gz", -#: "=/work/zones/*.tar.gz", #: ] #: -#: [[publish]] -#: series = "image" -#: name = "global-zone-packages" -#: from_output = "/work/global-zone-packages.tar.gz" -#: -#: [[publish]] -#: series = "image" -#: name = "trampoline-global-zone-packages" -#: from_output = "/work/trampoline-global-zone-packages.tar.gz" set -o errexit set -o pipefail @@ -32,17 +19,6 @@ rustc --version WORK=/work pfexec mkdir -p $WORK && pfexec chown $USER $WORK -# -# Generate the version for control plane artifacts here. We use `0.git` as the -# prerelease field because it comes before `alpha`. -# -# In this job, we stamp the version into packages installed in the host and -# trampoline global zone images. -# -COMMIT=$(git rev-parse HEAD) -VERSION="8.0.0-0.ci+git${COMMIT:0:11}" -echo "$VERSION" >/work/version.txt - ptime -m ./tools/install_builder_prerequisites.sh -yp ptime -m ./tools/ci_download_softnpu_machinery @@ -52,88 +28,33 @@ ptime -m cargo run --locked --release --bin omicron-package -- \ -t test target create -i standard -m non-gimlet -s softnpu -r single-sled ptime -m cargo run --locked --release --bin omicron-package -- \ -t test package +mapfile -t packages \ + < <(cargo run --locked --release --bin omicron-package -- -t test list-outputs) # Build the xtask binary used by the deploy job ptime -m cargo build --locked --release -p xtask -# Assemble some utilities into a tarball that can be used by deployment -# phases of buildomat. +# Build the end-to-end tests +# Reduce debuginfo just to line tables. +export CARGO_PROFILE_DEV_DEBUG=line-tables-only +export CARGO_PROFILE_TEST_DEBUG=line-tables-only +ptime -m cargo build --locked -p end-to-end-tests --tests --bin bootstrap \ + --message-format json-render-diagnostics >/tmp/output.end-to-end.json +mkdir tests +/opt/ooce/bin/jq -r 'select(.profile.test) | .executable' /tmp/output.end-to-end.json \ + | xargs -I {} -t cp {} tests/ + +# Assemble these outputs and some utilities into a tarball that can be used by +# deployment phases of buildomat. files=( - out/*.tar out/target/test out/npuzone/* package-manifest.toml smf/sled-agent/non-gimlet/config.toml target/release/omicron-package target/release/xtask + target/debug/bootstrap + tests/* ) - -ptime -m tar cvzf $WORK/package.tar.gz "${files[@]}" - -tarball_src_dir="$(pwd)/out/versioned" -stamp_packages() { - for package in "$@"; do - cargo run --locked --release --bin omicron-package -- stamp "$package" "$VERSION" - done -} - -# Keep the single-sled Nexus zone around for the deploy job. (The global zone -# build below overwrites the file.) -mv out/nexus.tar.gz out/nexus-single-sled.tar.gz - -# Build necessary for the global zone -ptime -m cargo run --locked --release --bin omicron-package -- \ - -t host target create -i standard -m gimlet -s asic -r multi-sled -ptime -m cargo run --locked --release --bin omicron-package -- \ - -t host package -stamp_packages omicron-sled-agent mg-ddm-gz propolis-server overlay oxlog pumpkind-gz - -# Create global zone package @ $WORK/global-zone-packages.tar.gz -ptime -m ./tools/build-global-zone-packages.sh "$tarball_src_dir" $WORK - -# Non-Global Zones - -# Assemble Zone Images into their respective output locations. -# -# Zones that are included into another are intentionally omitted from this list -# (e.g., the switch zone tarballs contain several other zone tarballs: dendrite, -# mg-ddm, etc.). -# -# Note that when building for a real gimlet, `propolis-server` and `switch-*` -# should be included in the OS ramdisk. -mkdir -p $WORK/zones -zones=( - out/clickhouse.tar.gz - out/clickhouse_keeper.tar.gz - out/cockroachdb.tar.gz - out/crucible-pantry-zone.tar.gz - out/crucible-zone.tar.gz - out/external-dns.tar.gz - out/internal-dns.tar.gz - out/nexus.tar.gz - out/nexus-single-sled.tar.gz - out/oximeter.tar.gz - out/propolis-server.tar.gz - out/switch-*.tar.gz - out/ntp.tar.gz - out/omicron-gateway-softnpu.tar.gz - out/omicron-gateway-asic.tar.gz - out/overlay.tar.gz - out/probe.tar.gz -) -cp "${zones[@]}" $WORK/zones/ - -# -# Global Zone files for Trampoline image -# - -# Build necessary for the trampoline image -ptime -m cargo run --locked --release --bin omicron-package -- \ - -t recovery target create -i trampoline -ptime -m cargo run --locked --release --bin omicron-package -- \ - -t recovery package -stamp_packages installinator mg-ddm-gz - -# Create trampoline global zone package @ $WORK/trampoline-global-zone-packages.tar.gz -ptime -m ./tools/build-trampoline-global-zone-packages.sh "$tarball_src_dir" $WORK +ptime -m tar cvzf $WORK/package.tar.gz "${files[@]}" "${packages[@]}" diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index 89928a0030..2ed1ae08c3 100755 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -3,20 +3,29 @@ #: name = "helios / build TUF repo" #: variety = "basic" #: target = "helios-2.0" +#: rust_toolchain = "1.77.2" #: output_rules = [ -#: "=/work/manifest*.toml", -#: "=/work/repo-*.zip", -#: "=/work/repo-*.zip.sha256.txt", +#: "=/work/manifest.toml", +#: "=/work/repo.zip", +#: "=/work/repo.zip.sha256.txt", +#: "%/work/*.log", +#: ] +#: access_repos = [ +#: "oxidecomputer/amd-apcb", +#: "oxidecomputer/amd-efs", +#: "oxidecomputer/amd-firmware", +#: "oxidecomputer/amd-flash", +#: "oxidecomputer/amd-host-image-builder", +#: "oxidecomputer/boot-image-tools", +#: "oxidecomputer/chelsio-t6-roms", +#: "oxidecomputer/compliance-pilot", +#: "oxidecomputer/facade", +#: "oxidecomputer/helios", +#: "oxidecomputer/helios-omicron-brand", +#: "oxidecomputer/helios-omnios-build", +#: "oxidecomputer/helios-omnios-extra", +#: "oxidecomputer/nanobl-rs", #: ] -#: -#: [dependencies.ci-tools] -#: job = "helios / CI tools" -#: -#: [dependencies.package] -#: job = "helios / package" -#: -#: [dependencies.host] -#: job = "helios / build OS images" #: #: [[publish]] #: series = "rot-all" @@ -26,105 +35,34 @@ #: [[publish]] #: series = "rot-all" #: name = "repo.zip" -#: from_output = "/work/repo-rot-all.zip" +#: from_output = "/work/repo.zip" #: #: [[publish]] #: series = "rot-all" #: name = "repo.zip.sha256.txt" -#: from_output = "/work/repo-rot-all.zip.sha256.txt" +#: from_output = "/work/repo.zip.sha256.txt" #: set -o errexit set -o pipefail set -o xtrace -TOP=$PWD -VERSION=$(< /input/package/work/version.txt) - -for bin in caboose-util tufaceous permslip; do - ptime -m gunzip < /input/ci-tools/work/$bin.gz > /work/$bin - chmod a+x /work/$bin -done - -# -# We do two things here: -# 1. Run `omicron-package stamp` on all the zones. -# 2. Run `omicron-package unpack` to switch from "package-name.tar.gz" to "service_name.tar.gz". -# -mkdir /work/package -pushd /work/package -tar xf /input/package/work/package.tar.gz out package-manifest.toml target/release/omicron-package -target/release/omicron-package -t default target create -i standard -m gimlet -s asic -r multi-sled -ln -s /input/package/work/zones/* out/ -rm out/switch-softnpu.tar.gz # not used when target switch=asic -rm out/omicron-gateway-softnpu.tar.gz # not used when target switch=asic -rm out/nexus-single-sled.tar.gz # only used for deploy tests -for zone in out/*.tar.gz; do - target/release/omicron-package stamp "$(basename "${zone%.tar.gz}")" "$VERSION" -done -mv out/versioned/* out/ -OMICRON_NO_UNINSTALL=1 target/release/omicron-package unpack --out install -popd - -# Generate a throwaway repository key. -python3 -c 'import secrets; open("/work/key.txt", "w").write("ed25519:%s\n" % secrets.token_hex(32))' -read -r TUFACEOUS_KEY /work/manifest.toml <>/work/manifest.toml <>/work/manifest.toml <> /work/manifest.toml - done < $TOP/tools/permslip_$name - popd -} +rc=0 +pfexec pkg install -q /system/zones/brand/omicron1/tools || rc=$? +case $rc in + # `man pkg` notes that exit code 4 means no changes were made because + # there is nothing to do; that's fine. Any other exit code is an error. + 0 | 4) ;; + *) exit $rc ;; +esac -mkdir /work/hubris -pushd /work/hubris -download_region_manifests https://permslip-staging.corp.oxide.computer staging -download_region_manifests https://signer-us-west.corp.oxide.computer production -popd +pfexec zfs create -p "rpool/images/$USER/host" +pfexec zfs create -p "rpool/images/$USER/recovery" -/work/tufaceous assemble --no-generate-key /work/manifest.toml /work/repo-rot-all.zip -digest -a sha256 /work/repo-rot-all.zip > /work/repo-rot-all.zip.sha256.txt +cargo run --release --bin omicron-releng -- --output-dir /work diff --git a/Cargo.lock b/Cargo.lock index e1e445cc3c..f7cded308b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -785,15 +785,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "caboose-util" -version = "0.1.0" -dependencies = [ - "anyhow", - "hubtools", - "omicron-workspace-hack", -] - [[package]] name = "camino" version = "1.1.6" @@ -2547,6 +2538,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "88a41f105fe1d5b6b34b2055e3dc59bb79b46b48b2040b9e6c7b4b5de097aa41" dependencies = [ "autocfg", + "tokio", ] [[package]] @@ -5592,6 +5584,37 @@ dependencies = [ "thiserror", ] +[[package]] +name = "omicron-releng" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "camino-tempfile", + "cargo_metadata", + "chrono", + "clap", + "fs-err", + "futures", + "hex", + "omicron-common", + "omicron-workspace-hack", + "omicron-zone-package", + "once_cell", + "reqwest", + "semver 1.0.22", + "serde", + "sha2", + "shell-words", + "slog", + "slog-async", + "slog-term", + "tar", + "tokio", + "toml 0.8.12", + "tufaceous-lib", +] + [[package]] name = "omicron-rpaths" version = "0.1.0" @@ -5778,6 +5801,7 @@ dependencies = [ "elliptic-curve", "ff", "flate2", + "fs-err", "futures", "futures-channel", "futures-core", @@ -5815,11 +5839,8 @@ dependencies = [ "pem-rfc7468", "petgraph", "postgres-types", - "ppv-lite86", "predicates", "proc-macro2", - "rand 0.8.5", - "rand_chacha 0.3.1", "regex", "regex-automata 0.4.5", "regex-syntax 0.8.2", @@ -5859,7 +5880,6 @@ dependencies = [ "yasna", "zerocopy 0.7.32", "zeroize", - "zip", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 83a41ff834..891565f857 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,6 @@ members = [ "api_identity", "bootstore", - "caboose-util", "certificates", "clients/bootstrap-agent-client", "clients/ddm-admin-client", @@ -21,6 +20,7 @@ members = [ "dev-tools/omicron-dev", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", + "dev-tools/releng", "dev-tools/xtask", "dns-server", "end-to-end-tests", @@ -84,7 +84,6 @@ members = [ default-members = [ "bootstore", - "caboose-util", "certificates", "clients/bootstrap-agent-client", "clients/ddm-admin-client", @@ -103,6 +102,7 @@ default-members = [ "dev-tools/omicron-dev", "dev-tools/oxlog", "dev-tools/reconfigurator-cli", + "dev-tools/releng", # Do not include xtask in the list of default members, because this causes # hakari to not work as well and build times to be longer. # See omicron#4392. @@ -228,6 +228,7 @@ bytes = "1.6.0" camino = { version = "1.1", features = ["serde1"] } camino-tempfile = "1.1.1" cancel-safe-futures = "0.1.5" +cargo_metadata = "0.18.1" chacha20poly1305 = "0.10.1" ciborium = "0.2.2" cfg-if = "1.0" diff --git a/caboose-util/Cargo.toml b/caboose-util/Cargo.toml deleted file mode 100644 index ceff70b41d..0000000000 --- a/caboose-util/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "caboose-util" -version = "0.1.0" -edition = "2021" -license = "MPL-2.0" - -[lints] -workspace = true - -[dependencies] -anyhow.workspace = true -hubtools.workspace = true -omicron-workspace-hack.workspace = true diff --git a/caboose-util/src/main.rs b/caboose-util/src/main.rs deleted file mode 100644 index 36851cd36d..0000000000 --- a/caboose-util/src/main.rs +++ /dev/null @@ -1,32 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -use anyhow::{bail, Context, Result}; -use hubtools::{Caboose, RawHubrisArchive}; - -fn main() -> Result<()> { - let mut args = std::env::args().skip(1); - match args.next().context("subcommand required")?.as_str() { - "read-board" => { - let caboose = read_caboose(args.next())?; - println!("{}", std::str::from_utf8(caboose.board()?)?); - Ok(()) - } - "read-version" => { - let caboose = read_caboose(args.next())?; - println!("{}", std::str::from_utf8(caboose.version()?)?); - Ok(()) - } - unknown => bail!("unknown command {}", unknown), - } -} - -fn read_caboose(path: Option) -> Result { - let archive = RawHubrisArchive::load( - &path.context("path to hubris archive required")?, - )?; - Ok(archive.read_caboose()?) -} diff --git a/dev-tools/releng/Cargo.toml b/dev-tools/releng/Cargo.toml new file mode 100644 index 0000000000..19ede6c24d --- /dev/null +++ b/dev-tools/releng/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "omicron-releng" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +camino.workspace = true +camino-tempfile.workspace = true +cargo_metadata.workspace = true +chrono.workspace = true +clap.workspace = true +fs-err = { workspace = true, features = ["tokio"] } +futures.workspace = true +hex.workspace = true +omicron-common.workspace = true +omicron-workspace-hack.workspace = true +omicron-zone-package.workspace = true +once_cell.workspace = true +reqwest.workspace = true +semver.workspace = true +serde.workspace = true +sha2.workspace = true +shell-words.workspace = true +slog.workspace = true +slog-async.workspace = true +slog-term.workspace = true +tar.workspace = true +tokio = { workspace = true, features = ["full"] } +toml.workspace = true +tufaceous-lib.workspace = true + +[lints] +workspace = true diff --git a/dev-tools/releng/src/cmd.rs b/dev-tools/releng/src/cmd.rs new file mode 100644 index 0000000000..198eabf99f --- /dev/null +++ b/dev-tools/releng/src/cmd.rs @@ -0,0 +1,167 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::ffi::OsStr; +use std::path::Path; +use std::process::ExitStatus; +use std::process::Output; +use std::process::Stdio; +use std::time::Instant; + +use anyhow::ensure; +use anyhow::Context; +use anyhow::Result; +use slog::debug; +use slog::Logger; + +/// Wrapper for `tokio::process::Command` where the builder methods take/return +/// `self`, plus a number of convenience methods. +pub(crate) struct Command { + inner: tokio::process::Command, +} + +impl Command { + pub(crate) fn new(program: impl AsRef) -> Command { + Command { inner: tokio::process::Command::new(program) } + } + + pub(crate) fn arg(mut self, arg: impl AsRef) -> Command { + self.inner.arg(arg); + self + } + + pub(crate) fn args( + mut self, + args: impl IntoIterator>, + ) -> Command { + self.inner.args(args); + self + } + + pub(crate) fn current_dir(mut self, dir: impl AsRef) -> Command { + self.inner.current_dir(dir); + self + } + + pub(crate) fn env( + mut self, + key: impl AsRef, + value: impl AsRef, + ) -> Command { + self.inner.env(key, value); + self + } + + pub(crate) fn env_remove(mut self, key: impl AsRef) -> Command { + self.inner.env_remove(key); + self + } + + pub(crate) async fn is_success(mut self, logger: &Logger) -> Result { + self.inner + .stdin(Stdio::null()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()); + Ok(xtrace(&mut self, logger).await?.status.success()) + } + + pub(crate) async fn ensure_success( + mut self, + logger: &Logger, + ) -> Result<()> { + self.inner + .stdin(Stdio::null()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()); + let status = xtrace(&mut self, logger).await?.status; + check_status(self, status) + } + + pub(crate) async fn ensure_stdout( + mut self, + logger: &Logger, + ) -> Result { + self.inner + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::inherit()); + let output = xtrace(&mut self, logger).await?; + check_status(self, output.status)?; + String::from_utf8(output.stdout).context("command stdout was not UTF-8") + } + + pub(crate) fn into_parts(self) -> (Description, tokio::process::Command) { + (Description { str: self.to_string() }, self.inner) + } +} + +impl std::fmt::Display for Command { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let command = self.inner.as_std(); + for (name, value) in command.get_envs() { + if let Some(value) = value { + write!( + f, + "{}={} ", + shell_words::quote(&name.to_string_lossy()), + shell_words::quote(&value.to_string_lossy()) + )?; + } + } + write!( + f, + "{}", + shell_words::quote(&command.get_program().to_string_lossy()) + )?; + for arg in command.get_args() { + write!(f, " {}", shell_words::quote(&arg.to_string_lossy()))?; + } + Ok(()) + } +} + +/// Returned from [`Command::into_parts`] for use in the `job` module. +pub(crate) struct Description { + str: String, +} + +impl Description { + pub(crate) fn check_status(&self, status: ExitStatus) -> Result<()> { + check_status(self, status) + } +} + +impl std::fmt::Display for Description { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.str) + } +} + +fn check_status( + command: impl std::fmt::Display, + status: ExitStatus, +) -> Result<()> { + ensure!(status.success(), "command `{}` exited with {}", command, status); + Ok(()) +} + +async fn xtrace(command: &mut Command, logger: &Logger) -> Result { + command.inner.stdin(Stdio::null()).kill_on_drop(true); + debug!(logger, "running: {}", command); + let start = Instant::now(); + let output = command + .inner + .spawn() + .with_context(|| format!("failed to exec `{}`", command))? + .wait_with_output() + .await + .with_context(|| format!("failed to wait on `{}`", command))?; + debug!( + logger, + "process exited with {} ({:?})", + output.status, + Instant::now().saturating_duration_since(start) + ); + Ok(output) +} diff --git a/dev-tools/releng/src/hubris.rs b/dev-tools/releng/src/hubris.rs new file mode 100644 index 0000000000..685a729a9f --- /dev/null +++ b/dev-tools/releng/src/hubris.rs @@ -0,0 +1,148 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::BTreeMap; +use std::collections::HashMap; + +use anyhow::Context; +use anyhow::Result; +use camino::Utf8PathBuf; +use fs_err::tokio as fs; +use futures::future::TryFutureExt; +use omicron_common::api::external::SemverVersion; +use omicron_common::api::internal::nexus::KnownArtifactKind; +use semver::Version; +use serde::Deserialize; +use tufaceous_lib::assemble::DeserializedArtifactData; +use tufaceous_lib::assemble::DeserializedArtifactSource; +use tufaceous_lib::assemble::DeserializedFileArtifactSource; +use tufaceous_lib::assemble::DeserializedManifest; + +pub(crate) async fn fetch_hubris_artifacts( + base_url: &'static str, + client: reqwest::Client, + manifest_list: Utf8PathBuf, + output_dir: Utf8PathBuf, +) -> Result<()> { + macro_rules! zip { + ($expr:expr) => { + output_dir.join(format!("{}.zip", $expr)) + }; + } + + fs::create_dir_all(&output_dir).await?; + + // This could be parallelized with FuturesUnordered but in practice this + // takes less time than OS builds. + + let mut manifest = DeserializedManifest { + system_version: SemverVersion(Version::new(0, 0, 0)), + artifacts: BTreeMap::new(), + }; + + for line in fs::read_to_string(manifest_list).await?.lines() { + if let Some(hash) = line.split_whitespace().next() { + let data = fetch_hash(base_url, &client, hash).await?; + let str = String::from_utf8(data).with_context(|| { + format!("hubris artifact manifest {} was not UTF-8", hash) + })?; + let hash_manifest: Manifest = + toml::from_str(&str).with_context(|| { + format!( + "failed to deserialize hubris artifact manifest {}", + hash + ) + })?; + for (kind, artifacts) in hash_manifest.artifacts { + for artifact in artifacts { + let (source, hashes) = match artifact.source { + Source::File(file) => ( + DeserializedArtifactSource::File { + path: zip!(file.hash), + }, + vec![file.hash], + ), + Source::CompositeRot { archive_a, archive_b } => ( + DeserializedArtifactSource::CompositeRot { + archive_a: + DeserializedFileArtifactSource::File { + path: zip!(archive_a.hash), + }, + archive_b: + DeserializedFileArtifactSource::File { + path: zip!(archive_b.hash), + }, + }, + vec![archive_a.hash, archive_b.hash], + ), + }; + manifest.artifacts.entry(kind).or_default().push( + DeserializedArtifactData { + name: artifact.name, + version: artifact.version, + source, + }, + ); + for hash in hashes { + let data = fetch_hash(base_url, &client, &hash).await?; + fs::write(output_dir.join(zip!(hash)), data).await?; + } + } + } + } + } + + fs::write( + output_dir.join("manifest.toml"), + toml::to_string_pretty(&manifest)?.into_bytes(), + ) + .await?; + Ok(()) +} + +async fn fetch_hash( + base_url: &'static str, + client: &reqwest::Client, + hash: &str, +) -> Result> { + client + .get(format!("{}/artifact/{}", base_url, hash)) + .send() + .and_then(|response| response.json()) + .await + .with_context(|| { + format!( + "failed to fetch hubris artifact {} from {}", + hash, base_url + ) + }) +} + +// These structs are similar to `DeserializeManifest` and friends from +// tufaceous-lib, except that the source is a hash instead of a file path. This +// hash is used to download the artifact from Permission Slip. +#[derive(Deserialize)] +struct Manifest { + #[serde(rename = "artifact")] + artifacts: HashMap>, +} + +#[derive(Deserialize)] +struct Artifact { + name: String, + version: SemverVersion, + source: Source, +} + +#[derive(Deserialize)] +#[serde(tag = "kind", rename_all = "kebab-case")] +enum Source { + File(FileSource), + CompositeRot { archive_a: FileSource, archive_b: FileSource }, +} + +#[derive(Deserialize)] +struct FileSource { + hash: String, +} diff --git a/dev-tools/releng/src/job.rs b/dev-tools/releng/src/job.rs new file mode 100644 index 0000000000..dcb58a0b92 --- /dev/null +++ b/dev-tools/releng/src/job.rs @@ -0,0 +1,305 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A quick-and-dirty job runner. +//! +//! Jobs are async functions given a name. All jobs must be described before the +//! jobs can be run (`Jobs::run_all` consumes the job runner). Jobs can depend +//! on other jobs, which is implemented via `tokio::sync::oneshot` channels; a +//! completed job sends a message to all registered receivers, which are waiting +//! on the messages in order to run. This essentially creates a DAG, except +//! instead of us having to keep track of it, we make it Tokio's problem. +//! +//! A `tokio::sync::Semaphore` is used to restrict the number of jobs to +//! `std::thread::available_parallelism`, except for a hardcoded list of +//! prioritized job names that are allowed to ignore this. + +use std::collections::HashMap; +use std::future::Future; +use std::process::Stdio; +use std::sync::Arc; +use std::time::Instant; + +use anyhow::anyhow; +use anyhow::Context; +use anyhow::Result; +use camino::Utf8Path; +use camino::Utf8PathBuf; +use fs_err::tokio::File; +use futures::future::BoxFuture; +use futures::future::FutureExt; +use futures::stream::FuturesUnordered; +use futures::stream::TryStreamExt; +use slog::info; +use slog::Logger; +use tokio::io::AsyncBufReadExt; +use tokio::io::AsyncRead; +use tokio::io::AsyncWrite; +use tokio::io::AsyncWriteExt; +use tokio::io::BufReader; +use tokio::sync::oneshot; +use tokio::sync::oneshot::error::RecvError; +use tokio::sync::Semaphore; + +use crate::cmd::Command; + +// We want these two jobs to run without delay because they take the longest +// amount of time, so we allow them to run without taking a permit first. +const PERMIT_NOT_REQUIRED: [&str; 2] = ["host-package", "host-image"]; + +pub(crate) struct Jobs { + logger: Logger, + permits: Arc, + log_dir: Utf8PathBuf, + map: HashMap, +} + +struct Job { + future: BoxFuture<'static, Result<()>>, + wait_for: Vec>, + notify: Vec>, +} + +pub(crate) struct Selector<'a> { + jobs: &'a mut Jobs, + name: String, +} + +impl Jobs { + pub(crate) fn new( + logger: &Logger, + permits: Arc, + log_dir: &Utf8Path, + ) -> Jobs { + Jobs { + logger: logger.clone(), + permits, + log_dir: log_dir.to_owned(), + map: HashMap::new(), + } + } + + pub(crate) fn push( + &mut self, + name: impl AsRef, + future: impl Future> + Send + 'static, + ) -> Selector<'_> { + let name = name.as_ref().to_owned(); + assert!(!self.map.contains_key(&name), "duplicate job name {}", name); + self.map.insert( + name.clone(), + Job { + future: run_job( + self.logger.clone(), + self.permits.clone(), + name.clone(), + future, + ) + .boxed(), + wait_for: Vec::new(), + notify: Vec::new(), + }, + ); + Selector { jobs: self, name } + } + + pub(crate) fn push_command( + &mut self, + name: impl AsRef, + command: Command, + ) -> Selector<'_> { + let name = name.as_ref().to_owned(); + assert!(!self.map.contains_key(&name), "duplicate job name {}", name); + self.map.insert( + name.clone(), + Job { + future: spawn_with_output( + command, + self.logger.clone(), + self.permits.clone(), + name.clone(), + self.log_dir.join(&name).with_extension("log"), + ) + .boxed(), + wait_for: Vec::new(), + notify: Vec::new(), + }, + ); + Selector { jobs: self, name } + } + + pub(crate) fn select(&mut self, name: impl AsRef) -> Selector<'_> { + Selector { jobs: self, name: name.as_ref().to_owned() } + } + + pub(crate) async fn run_all(self) -> Result<()> { + self.map + .into_values() + .map(Job::run) + .collect::>() + .try_collect::<()>() + .await + } +} + +impl Job { + async fn run(self) -> Result<()> { + let result: Result<(), RecvError> = self + .wait_for + .into_iter() + .collect::>() + .try_collect::<()>() + .await; + result.map_err(|_| anyhow!("dependency failed"))?; + + self.future.await?; + for sender in self.notify { + // Ignore the error here -- the only reason we should fail to send + // our message is if a task has failed or the user hit Ctrl-C, at + // which point a bunch of error logging is not particularly useful. + sender.send(()).ok(); + } + Ok(()) + } +} + +impl<'a> Selector<'a> { + #[track_caller] + pub(crate) fn after(self, other: impl AsRef) -> Self { + let (sender, receiver) = oneshot::channel(); + self.jobs + .map + .get_mut(&self.name) + .expect("invalid job name") + .wait_for + .push(receiver); + self.jobs + .map + .get_mut(other.as_ref()) + .expect("invalid job name") + .notify + .push(sender); + self + } +} + +macro_rules! info_or_error { + ($logger:expr, $result:expr, $($tt:tt)*) => { + if $result.is_ok() { + ::slog::info!($logger, $($tt)*); + } else { + ::slog::error!($logger, $($tt)*); + } + }; +} + +async fn run_job( + logger: Logger, + permits: Arc, + name: String, + future: impl Future> + Send + 'static, +) -> Result<()> { + if !PERMIT_NOT_REQUIRED.contains(&name.as_str()) { + let _ = permits.acquire_owned().await?; + } + + info!(logger, "[{}] running task", name); + let start = Instant::now(); + let result = tokio::spawn(future).await?; + let duration = Instant::now().saturating_duration_since(start); + info_or_error!( + logger, + result, + "[{}] task {} ({:?})", + name, + if result.is_ok() { "succeeded" } else { "failed" }, + duration + ); + result +} + +async fn spawn_with_output( + command: Command, + logger: Logger, + permits: Arc, + name: String, + log_path: Utf8PathBuf, +) -> Result<()> { + if !PERMIT_NOT_REQUIRED.contains(&name.as_str()) { + let _ = permits.acquire_owned().await?; + } + + let (command_desc, mut command) = command.into_parts(); + + let log_file_1 = File::create(log_path).await?; + let log_file_2 = log_file_1.try_clone().await?; + + info!(logger, "[{}] running: {}", name, command_desc); + let start = Instant::now(); + let mut child = command + .kill_on_drop(true) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .with_context(|| format!("failed to exec `{}`", command_desc))?; + + let stdout = spawn_reader( + format!("[{:>16}] ", name), + child.stdout.take().unwrap(), + tokio::io::stdout(), + log_file_1, + ); + let stderr = spawn_reader( + format!("[{:>16}] ", name), + child.stderr.take().unwrap(), + tokio::io::stderr(), + log_file_2, + ); + + let status = child.wait().await.with_context(|| { + format!("I/O error while waiting for job {:?} to complete", name) + })?; + let result = command_desc.check_status(status); + info_or_error!( + logger, + result, + "[{}] process exited with {} ({:?})", + name, + status, + Instant::now().saturating_duration_since(start) + ); + + // bubble up any errors from `spawn_reader` + stdout.await??; + stderr.await??; + + result +} + +fn spawn_reader( + prefix: String, + reader: impl AsyncRead + Send + Unpin + 'static, + mut terminal_writer: impl AsyncWrite + Send + Unpin + 'static, + logfile_writer: File, +) -> tokio::task::JoinHandle> { + let mut reader = BufReader::new(reader); + let mut logfile_writer = tokio::fs::File::from(logfile_writer); + let mut buf = prefix.into_bytes(); + let prefix_len = buf.len(); + tokio::spawn(async move { + loop { + buf.truncate(prefix_len); + // We have no particular control over the output from the child + // processes we run, so we read until a newline character without + // relying on valid UTF-8 output. + let size = reader.read_until(b'\n', &mut buf).await?; + if size == 0 { + return Ok(()); + } + terminal_writer.write_all(&buf).await?; + logfile_writer.write_all(&buf[prefix_len..]).await?; + } + }) +} diff --git a/dev-tools/releng/src/main.rs b/dev-tools/releng/src/main.rs new file mode 100644 index 0000000000..0fa4382931 --- /dev/null +++ b/dev-tools/releng/src/main.rs @@ -0,0 +1,734 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod cmd; +mod hubris; +mod job; +mod tuf; + +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +use anyhow::bail; +use anyhow::Context; +use anyhow::Result; +use camino::Utf8PathBuf; +use chrono::Utc; +use clap::Parser; +use fs_err::tokio as fs; +use omicron_zone_package::config::Config; +use once_cell::sync::Lazy; +use semver::Version; +use slog::debug; +use slog::error; +use slog::info; +use slog::Drain; +use slog::Logger; +use slog_term::FullFormat; +use slog_term::TermDecorator; +use tokio::sync::Semaphore; + +use crate::cmd::Command; +use crate::job::Jobs; + +/// The base version we're currently building. Build information is appended to +/// this later on. +/// +/// Under current policy, each new release is a major version bump, and +/// generally referred to only by the major version (e.g. 8.0.0 is referred +/// to as "v8", "version 8", or "release 8" to customers). The use of semantic +/// versioning is mostly to hedge for perhaps wanting something more granular in +/// the future. +const BASE_VERSION: Version = Version::new(8, 0, 0); + +#[derive(Debug, Clone, Copy)] +enum InstallMethod { + /// Unpack the tarball to `/opt/oxide/`, and install + /// `pkg/manifest.xml` (if it exists) to + /// `/lib/svc/manifest/site/.xml`. + Install, + /// Copy the tarball to `/opt/oxide/.tar.gz`. + Bundle, +} + +/// Packages to install or bundle in the host OS image. +const HOST_IMAGE_PACKAGES: [(&str, InstallMethod); 7] = [ + ("mg-ddm-gz", InstallMethod::Install), + ("omicron-sled-agent", InstallMethod::Install), + ("overlay", InstallMethod::Bundle), + ("oxlog", InstallMethod::Install), + ("propolis-server", InstallMethod::Bundle), + ("pumpkind-gz", InstallMethod::Install), + ("switch-asic", InstallMethod::Bundle), +]; +/// Packages to install or bundle in the recovery (trampoline) OS image. +const RECOVERY_IMAGE_PACKAGES: [(&str, InstallMethod); 2] = [ + ("installinator", InstallMethod::Install), + ("mg-ddm-gz", InstallMethod::Install), +]; +/// Packages to ship with the TUF repo. +const TUF_PACKAGES: [&str; 11] = [ + "clickhouse_keeper", + "clickhouse", + "cockroachdb", + "crucible-pantry-zone", + "crucible-zone", + "external-dns", + "internal-dns", + "nexus", + "ntp", + "oximeter", + "probe", +]; + +const HELIOS_REPO: &str = "https://pkg.oxide.computer/helios/2/dev/"; + +static WORKSPACE_DIR: Lazy = Lazy::new(|| { + // $CARGO_MANIFEST_DIR is at `.../omicron/dev-tools/releng` + let mut dir = + Utf8PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").expect( + "$CARGO_MANIFEST_DIR is not set; run this via `cargo xtask releng`", + )); + dir.pop(); + dir.pop(); + dir +}); + +#[derive(Parser)] +/// Run the Oxide release engineering process and produce a TUF repo that can be +/// used to update a rack. +/// +/// For more information, see `docs/releng.adoc` in the Omicron repository. +/// +/// Note that `--host-dataset` and `--recovery-dataset` must be set to different +/// values to build the two OS images in parallel. This is strongly recommended. +struct Args { + /// ZFS dataset to use for `helios-build` when building the host image + #[clap(long, default_value_t = Self::default_dataset("host"))] + host_dataset: String, + + /// ZFS dataset to use for `helios-build` when building the recovery + /// (trampoline) image + #[clap(long, default_value_t = Self::default_dataset("recovery"))] + recovery_dataset: String, + + /// Path to a Helios repository checkout (default: "helios" in the same + /// directory as "omicron") + #[clap(long, default_value_t = Self::default_helios_dir())] + helios_dir: Utf8PathBuf, + + /// Ignore the current HEAD of the Helios repository checkout + #[clap(long)] + ignore_helios_origin: bool, + + /// Output dir for TUF repo and log files + #[clap(long, default_value_t = Self::default_output_dir())] + output_dir: Utf8PathBuf, + + /// Path to the directory containing the rustup proxy `bin/cargo` (usually + /// set by Cargo) + #[clap(long, env = "CARGO_HOME")] + cargo_home: Option, + + /// Path to the git binary + #[clap(long, env = "GIT", default_value = "git")] + git_bin: Utf8PathBuf, + + /// Path to a pre-built omicron-package binary (skips building if set) + #[clap(long, env = "OMICRON_PACKAGE")] + omicron_package_bin: Option, +} + +impl Args { + fn default_dataset(name: &str) -> String { + format!( + "rpool/images/{}/{}", + std::env::var("LOGNAME").expect("$LOGNAME is not set"), + name + ) + } + + fn default_helios_dir() -> Utf8PathBuf { + WORKSPACE_DIR + .parent() + .expect("omicron is presumably not cloned at /") + .join("helios") + } + + fn default_output_dir() -> Utf8PathBuf { + WORKSPACE_DIR.join("out/releng") + } +} + +#[tokio::main] +async fn main() -> Result<()> { + let args = Args::parse(); + + let decorator = TermDecorator::new().build(); + let drain = FullFormat::new(decorator).build().fuse(); + let drain = slog_async::Async::new(drain).build().fuse(); + let logger = Logger::root(drain, slog::o!()); + + // Change the working directory to the workspace root. + debug!(logger, "changing working directory to {}", *WORKSPACE_DIR); + std::env::set_current_dir(&*WORKSPACE_DIR) + .context("failed to change working directory to workspace root")?; + + // Determine the target directory. + let target_dir = cargo_metadata::MetadataCommand::new() + .no_deps() + .exec() + .context("failed to get cargo metadata")? + .target_directory; + + // We build everything in Omicron with $CARGO, but we need to use the rustup + // proxy for Cargo when outside Omicron. + let rustup_cargo = match &args.cargo_home { + Some(path) => path.join("bin/cargo"), + None => Utf8PathBuf::from("cargo"), + }; + // `var_os` here is deliberate: if CARGO is set to a non-UTF-8 path we + // shouldn't do something confusing as a fallback. + let cargo = match std::env::var_os("CARGO") { + Some(path) => Utf8PathBuf::try_from(std::path::PathBuf::from(path)) + .context("$CARGO is not valid UTF-8")?, + None => rustup_cargo.clone(), + }; + + let permits = Arc::new(Semaphore::new( + std::thread::available_parallelism() + .context("couldn't get available parallelism")? + .into(), + )); + + let commit = Command::new(&args.git_bin) + .args(["rev-parse", "HEAD"]) + .ensure_stdout(&logger) + .await? + .trim() + .to_owned(); + + let mut version = BASE_VERSION.clone(); + // Differentiate between CI and local builds. We use `0.word` as the + // prerelease field because it comes before `alpha`. + version.pre = + if std::env::var_os("CI").is_some() { "0.ci" } else { "0.local" } + .parse()?; + // Set the build metadata to the current commit hash. + let mut build = String::with_capacity(14); + build.push_str("git"); + build.extend(commit.chars().take(11)); + version.build = build.parse()?; + let version_str = version.to_string(); + info!(logger, "version: {}", version_str); + + let manifest = Arc::new(omicron_zone_package::config::parse_manifest( + &fs::read_to_string(WORKSPACE_DIR.join("package-manifest.toml")) + .await?, + )?); + let opte_version = + fs::read_to_string(WORKSPACE_DIR.join("tools/opte_version")).await?; + + let client = reqwest::ClientBuilder::new() + .connect_timeout(Duration::from_secs(15)) + .timeout(Duration::from_secs(15)) + .build() + .context("failed to build reqwest client")?; + + // PREFLIGHT ============================================================== + let mut preflight_ok = true; + + for package in HOST_IMAGE_PACKAGES + .into_iter() + .chain(RECOVERY_IMAGE_PACKAGES) + .map(|(package, _)| package) + .chain(TUF_PACKAGES) + { + if !manifest.packages.contains_key(package) { + error!( + logger, + "package {} to be installed in the OS image \ + is not listed in the package manifest", + package + ); + preflight_ok = false; + } + } + + // Ensure the Helios checkout exists + if args.helios_dir.exists() { + if !args.ignore_helios_origin { + // check that our helios clone is up to date + Command::new(&args.git_bin) + .arg("-C") + .arg(&args.helios_dir) + .args(["fetch", "--no-write-fetch-head", "origin", "master"]) + .ensure_success(&logger) + .await?; + let stdout = Command::new(&args.git_bin) + .arg("-C") + .arg(&args.helios_dir) + .args(["rev-parse", "HEAD", "origin/master"]) + .ensure_stdout(&logger) + .await?; + let mut lines = stdout.lines(); + let first = + lines.next().context("git-rev-parse output was empty")?; + if !lines.all(|line| line == first) { + error!( + logger, + "helios checkout at {0} is out-of-date; run \ + `git pull -C {0}`, or run omicron-releng with \ + --ignore-helios-origin or --helios-path", + shell_words::quote(args.helios_dir.as_str()) + ); + preflight_ok = false; + } + } + } else { + info!(logger, "cloning helios to {}", args.helios_dir); + Command::new(&args.git_bin) + .args(["clone", "https://github.com/oxidecomputer/helios.git"]) + .arg(&args.helios_dir) + .ensure_success(&logger) + .await?; + } + // Record the branch and commit in the output + Command::new(&args.git_bin) + .arg("-C") + .arg(&args.helios_dir) + .args(["status", "--branch", "--porcelain=2"]) + .ensure_success(&logger) + .await?; + + // Check that the omicron1 brand is installed + if !Command::new("pkg") + .args(["verify", "-q", "/system/zones/brand/omicron1/tools"]) + .is_success(&logger) + .await? + { + error!( + logger, + "the omicron1 brand is not installed; install it with \ + `pfexec pkg install /system/zones/brand/omicron1/tools`" + ); + preflight_ok = false; + } + + // Check that the datasets for helios-image to use exist + for (dataset, option) in [ + (&args.host_dataset, "--host-dataset"), + (&args.recovery_dataset, "--recovery-dataset"), + ] { + if !Command::new("zfs") + .arg("list") + .arg(dataset) + .is_success(&logger) + .await? + { + error!( + logger, + "the dataset {0} does not exist; run `pfexec zfs create \ + -p {0}`, or specify a different one with {1}", + shell_words::quote(dataset), + option + ); + preflight_ok = false; + } + } + + if !preflight_ok { + bail!("some preflight checks failed"); + } + + fs::create_dir_all(&args.output_dir).await?; + + // DEFINE JOBS ============================================================ + let tempdir = camino_tempfile::tempdir() + .context("failed to create temporary directory")?; + let mut jobs = Jobs::new(&logger, permits.clone(), &args.output_dir); + + jobs.push_command( + "helios-setup", + Command::new("ptime") + .args(["-m", "gmake", "setup"]) + .current_dir(&args.helios_dir) + // ?!?! + // somehow, the Makefile does not see a new `$(PWD)` without this. + .env("PWD", &args.helios_dir) + // Setting `BUILD_OS` to no makes setup skip repositories we don't + // need for building the OS itself (we are just building an image + // from an already-built OS). + .env("BUILD_OS", "no") + .env_remove("CARGO") + .env_remove("RUSTUP_TOOLCHAIN"), + ); + + // Download the toolchain for phbl before we get to the image build steps. + // (This is possibly a micro-optimization.) + jobs.push_command( + "phbl-toolchain", + Command::new(&rustup_cargo) + .arg("--version") + .current_dir(args.helios_dir.join("projects/phbl")) + .env_remove("CARGO") + .env_remove("RUSTUP_TOOLCHAIN"), + ) + .after("helios-setup"); + + let omicron_package = if let Some(path) = &args.omicron_package_bin { + // omicron-package is provided, so don't build it. + jobs.push("omicron-package", std::future::ready(Ok(()))); + path.clone() + } else { + jobs.push_command( + "omicron-package", + Command::new("ptime").args([ + "-m", + cargo.as_str(), + "build", + "--locked", + "--release", + "--bin", + "omicron-package", + ]), + ); + target_dir.join("release/omicron-package") + }; + + // Generate `omicron-package stamp` jobs for a list of packages as a nested + // `Jobs`. Returns the selector for the outer job. + // + // (This could be a function but the resulting function would have too many + // confusable arguments.) + macro_rules! stamp_packages { + ($name:expr, $target:expr, $packages:expr) => {{ + let mut stamp_jobs = + Jobs::new(&logger, permits.clone(), &args.output_dir); + for package in $packages { + stamp_jobs.push_command( + format!("stamp-{}", package), + Command::new(&omicron_package) + .args([ + "--target", + $target.as_str(), + "--artifacts", + $target.artifacts_path(&args).as_str(), + "stamp", + package, + &version_str, + ]) + .env_remove("CARGO_MANIFEST_DIR"), + ); + } + jobs.push($name, stamp_jobs.run_all()) + }}; + } + + for target in [Target::Host, Target::Recovery] { + let artifacts_path = target.artifacts_path(&args); + + // omicron-package target create + jobs.push_command( + format!("{}-target", target), + Command::new(&omicron_package) + .args([ + "--target", + target.as_str(), + "--artifacts", + artifacts_path.as_str(), + "target", + "create", + ]) + .args(target.target_args()) + .env_remove("CARGO_MANIFEST_DIR"), + ) + .after("omicron-package"); + + // omicron-package package + jobs.push_command( + format!("{}-package", target), + Command::new(&omicron_package) + .args([ + "--target", + target.as_str(), + "--artifacts", + artifacts_path.as_str(), + "package", + ]) + .env_remove("CARGO_MANIFEST_DIR"), + ) + .after(format!("{}-target", target)); + + // omicron-package stamp + stamp_packages!( + format!("{}-stamp", target), + target, + target.proto_package_names() + ) + .after(format!("{}-package", target)); + + // [build proto dir, to be overlaid into disk image] + let proto_dir = tempdir.path().join("proto").join(target.as_str()); + jobs.push( + format!("{}-proto", target), + build_proto_area( + artifacts_path, + proto_dir.clone(), + target.proto_packages(), + manifest.clone(), + ), + ) + .after(format!("{}-stamp", target)); + + // The ${os_short_commit} token will be expanded by `helios-build` + let image_name = format!( + "{} {}/${{os_short_commit}} {}", + target.image_prefix(), + commit.chars().take(7).collect::(), + Utc::now().format("%Y-%m-%d %H:%M") + ); + + // helios-build experiment-image + jobs.push_command( + format!("{}-image", target), + Command::new("ptime") + .arg("-m") + .arg(args.helios_dir.join("helios-build")) + .arg("experiment-image") + .arg("-o") // output directory for image + .arg(args.output_dir.join(format!("os-{}", target))) + .arg("-p") // use an external package repository + .arg(format!("helios-dev={}", HELIOS_REPO)) + .arg("-F") // pass extra image builder features + .arg(format!("optever={}", opte_version.trim())) + .arg("-P") // include all files from extra proto area + .arg(proto_dir.join("root")) + .arg("-N") // image name + .arg(image_name) + .arg("-s") // tempdir name suffix + .arg(target.as_str()) + .args(target.image_build_args()) + .current_dir(&args.helios_dir) + .env( + "IMAGE_DATASET", + match target { + Target::Host => &args.host_dataset, + Target::Recovery => &args.recovery_dataset, + }, + ) + .env_remove("CARGO") + .env_remove("RUSTUP_TOOLCHAIN"), + ) + .after("helios-setup") + .after(format!("{}-proto", target)); + } + // Build the recovery target after we build the host target. Only one + // of these will build at a time since Cargo locks its target directory; + // since host-package and host-image both take longer than their recovery + // counterparts, this should be the fastest option to go first. + jobs.select("recovery-package").after("host-package"); + if args.host_dataset == args.recovery_dataset { + // If the datasets are the same, we can't parallelize these. + jobs.select("recovery-image").after("host-image"); + } + + // Set up /root/.profile in the host OS image. + jobs.push( + "host-profile", + host_add_root_profile(tempdir.path().join("proto/host/root/root")), + ) + .after("host-proto"); + jobs.select("host-image").after("host-profile"); + + stamp_packages!("tuf-stamp", Target::Host, TUF_PACKAGES) + .after("host-stamp"); + + for (name, base_url) in [ + ("staging", "https://permslip-staging.corp.oxide.computer"), + ("production", "https://signer-us-west.corp.oxide.computer"), + ] { + jobs.push( + format!("hubris-{}", name), + hubris::fetch_hubris_artifacts( + base_url, + client.clone(), + WORKSPACE_DIR.join(format!("tools/permslip_{}", name)), + args.output_dir.join(format!("hubris-{}", name)), + ), + ); + } + + jobs.push( + "tuf-repo", + tuf::build_tuf_repo( + logger.clone(), + args.output_dir.clone(), + version, + manifest, + ), + ) + .after("tuf-stamp") + .after("host-image") + .after("recovery-image") + .after("hubris-staging") + .after("hubris-production"); + + // RUN JOBS =============================================================== + let start = Instant::now(); + jobs.run_all().await?; + info!( + logger, + "all jobs completed in {:?}", + Instant::now().saturating_duration_since(start) + ); + Ok(()) +} + +#[derive(Clone, Copy)] +enum Target { + Host, + Recovery, +} + +impl Target { + fn as_str(self) -> &'static str { + match self { + Target::Host => "host", + Target::Recovery => "recovery", + } + } + + fn artifacts_path(self, args: &Args) -> Utf8PathBuf { + match self { + Target::Host => WORKSPACE_DIR.join("out"), + Target::Recovery => { + args.output_dir.join(format!("artifacts-{}", self)) + } + } + } + + fn target_args(self) -> &'static [&'static str] { + match self { + Target::Host => &[ + "--image", + "standard", + "--machine", + "gimlet", + "--switch", + "asic", + "--rack-topology", + "multi-sled", + ], + Target::Recovery => &["--image", "trampoline"], + } + } + + fn proto_packages(self) -> &'static [(&'static str, InstallMethod)] { + match self { + Target::Host => &HOST_IMAGE_PACKAGES, + Target::Recovery => &RECOVERY_IMAGE_PACKAGES, + } + } + + fn proto_package_names(self) -> impl Iterator { + self.proto_packages().iter().map(|(name, _)| *name) + } + + fn image_prefix(self) -> &'static str { + match self { + Target::Host => "ci", + Target::Recovery => "recovery", + } + } + + fn image_build_args(self) -> &'static [&'static str] { + match self { + Target::Host => &[ + "-B", // include omicron1 brand + ], + Target::Recovery => &[ + "-R", // recovery image + ], + } + } +} + +impl std::fmt::Display for Target { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +async fn build_proto_area( + mut package_dir: Utf8PathBuf, + proto_dir: Utf8PathBuf, + packages: &'static [(&'static str, InstallMethod)], + manifest: Arc, +) -> Result<()> { + let opt_oxide = proto_dir.join("root/opt/oxide"); + let manifest_site = proto_dir.join("root/lib/svc/manifest/site"); + fs::create_dir_all(&opt_oxide).await?; + + // use the stamped packages + package_dir.push("versioned"); + + for &(package_name, method) in packages { + let package = + manifest.packages.get(package_name).expect("checked in preflight"); + match method { + InstallMethod::Install => { + let path = opt_oxide.join(&package.service_name); + fs::create_dir(&path).await?; + + let cloned_path = path.clone(); + let cloned_package_dir = package_dir.to_owned(); + tokio::task::spawn_blocking(move || -> Result<()> { + let mut archive = tar::Archive::new(std::fs::File::open( + cloned_package_dir + .join(package_name) + .with_extension("tar"), + )?); + archive.unpack(cloned_path).with_context(|| { + format!("failed to extract {}.tar.gz", package_name) + })?; + Ok(()) + }) + .await??; + + let smf_manifest = path.join("pkg").join("manifest.xml"); + if smf_manifest.exists() { + fs::create_dir_all(&manifest_site).await?; + fs::rename( + smf_manifest, + manifest_site + .join(&package.service_name) + .with_extension("xml"), + ) + .await?; + } + } + InstallMethod::Bundle => { + fs::copy( + package_dir.join(format!("{}.tar.gz", package_name)), + opt_oxide.join(format!("{}.tar.gz", package.service_name)), + ) + .await?; + } + } + } + + Ok(()) +} + +async fn host_add_root_profile(host_proto_root: Utf8PathBuf) -> Result<()> { + fs::create_dir_all(&host_proto_root).await?; + fs::write( + host_proto_root.join(".profile"), + "# Add opteadm, ddadm, oxlog to PATH\n\ + export PATH=$PATH:/opt/oxide/opte/bin:/opt/oxide/mg-ddm:/opt/oxide/oxlog\n", + ).await?; + Ok(()) +} diff --git a/dev-tools/releng/src/tuf.rs b/dev-tools/releng/src/tuf.rs new file mode 100644 index 0000000000..2a880210eb --- /dev/null +++ b/dev-tools/releng/src/tuf.rs @@ -0,0 +1,149 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::Arc; + +use anyhow::Context; +use anyhow::Result; +use camino::Utf8PathBuf; +use chrono::Duration; +use chrono::Timelike; +use chrono::Utc; +use fs_err::tokio as fs; +use fs_err::tokio::File; +use omicron_common::api::external::SemverVersion; +use omicron_common::api::internal::nexus::KnownArtifactKind; +use omicron_zone_package::config::Config; +use semver::Version; +use sha2::Digest; +use sha2::Sha256; +use slog::Logger; +use tokio::io::AsyncReadExt; +use tufaceous_lib::assemble::ArtifactManifest; +use tufaceous_lib::assemble::DeserializedArtifactData; +use tufaceous_lib::assemble::DeserializedArtifactSource; +use tufaceous_lib::assemble::DeserializedControlPlaneZoneSource; +use tufaceous_lib::assemble::DeserializedManifest; +use tufaceous_lib::assemble::OmicronRepoAssembler; +use tufaceous_lib::Key; + +pub(crate) async fn build_tuf_repo( + logger: Logger, + output_dir: Utf8PathBuf, + version: Version, + package_manifest: Arc, +) -> Result<()> { + // We currently go about this somewhat strangely; the old release + // engineering process produced a Tufaceous manifest, and (the now very many + // copies of) the TUF repo download-and-unpack script we use expects to be + // able to download a manifest. So we build up a `DeserializedManifest`, + // write it to disk, and then turn it into an `ArtifactManifest` to actually + // build the repo. + + // Start a new manifest by loading the Hubris staging manifest. + let mut manifest = DeserializedManifest::from_path( + &output_dir.join("hubris-staging/manifest.toml"), + ) + .context("failed to open intermediate hubris staging manifest")?; + // Set the version. + manifest.system_version = SemverVersion(version); + + // Load the Hubris production manifest and merge it in. + let hubris_production = DeserializedManifest::from_path( + &output_dir.join("hubris-production/manifest.toml"), + ) + .context("failed to open intermediate hubris production manifest")?; + for (kind, artifacts) in hubris_production.artifacts { + manifest.artifacts.entry(kind).or_default().extend(artifacts); + } + + // Add the OS images. + manifest.artifacts.insert( + KnownArtifactKind::Host, + vec![DeserializedArtifactData { + name: "host".to_string(), + version: manifest.system_version.clone(), + source: DeserializedArtifactSource::File { + path: output_dir.join("os-host/os.tar.gz"), + }, + }], + ); + manifest.artifacts.insert( + KnownArtifactKind::Trampoline, + vec![DeserializedArtifactData { + name: "trampoline".to_string(), + version: manifest.system_version.clone(), + source: DeserializedArtifactSource::File { + path: output_dir.join("os-recovery/os.tar.gz"), + }, + }], + ); + + // Add the control plane zones. + let mut zones = Vec::new(); + for package in crate::TUF_PACKAGES { + zones.push(DeserializedControlPlaneZoneSource::File { + file_name: Some(format!( + "{}.tar.gz", + package_manifest + .packages + .get(package) + .expect("checked in preflight") + .service_name + )), + path: crate::WORKSPACE_DIR + .join("out/versioned") + .join(format!("{}.tar.gz", package)), + }); + } + manifest.artifacts.insert( + KnownArtifactKind::ControlPlane, + vec![DeserializedArtifactData { + name: "control-plane".to_string(), + version: manifest.system_version.clone(), + source: DeserializedArtifactSource::CompositeControlPlane { zones }, + }], + ); + + // Serialize the manifest out. + fs::write( + output_dir.join("manifest.toml"), + toml::to_string_pretty(&manifest)?.into_bytes(), + ) + .await?; + + // Convert the manifest. + let manifest = ArtifactManifest::from_deserialized(&output_dir, manifest)?; + manifest.verify_all_present()?; + // Assemble the repo. + let keys = vec![Key::generate_ed25519()]; + let expiry = Utc::now().with_nanosecond(0).unwrap() + Duration::weeks(1); + OmicronRepoAssembler::new( + &logger, + manifest, + keys, + expiry, + output_dir.join("repo.zip"), + ) + .build() + .await?; + // Generate the checksum file. + let mut hasher = Sha256::new(); + let mut buf = [0; 8192]; + let mut file = File::open(output_dir.join("repo.zip")).await?; + loop { + let n = file.read(&mut buf).await?; + if n == 0 { + break; + } + hasher.update(&buf[..n]); + } + fs::write( + output_dir.join("repo.zip.sha256.txt"), + format!("{}\n", hex::encode(&hasher.finalize())), + ) + .await?; + + Ok(()) +} diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index 11fcf405bd..2aecde57e5 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -11,7 +11,7 @@ workspace = true anyhow.workspace = true camino.workspace = true cargo_toml = "0.20" -cargo_metadata = "0.18" +cargo_metadata.workspace = true clap.workspace = true macaddr.workspace = true serde.workspace = true diff --git a/docs/releng.adoc b/docs/releng.adoc new file mode 100644 index 0000000000..31252c9a89 --- /dev/null +++ b/docs/releng.adoc @@ -0,0 +1,81 @@ +:showtitle: +:numbered: +:toc: left + += Oxide Release Engineering + +Omicron is the Oxide control plane, and thus brings together all of the +various components outside of this repo that make up the software on the +product. This includes (but definitely isn't limited to): + +- https://github.com/oxidecomputer/propolis[Propolis], our hypervisor +- https://github.com/oxidecomputer/helios[Helios], our host operating + system +- https://github.com/oxidecomputer/crucible[Crucible], our block storage + service +- https://github.com/oxidecomputer/maghemite[Maghemite], our switch + control software and routing protocol +- https://github.com/oxidecomputer/hubris[Hubris], our embedded + microcontroller operating system used on the root of trust and service + processors +- https://github.com/oxidecomputer/console[The web console] + +Each of these has their own build processes that produce some sort of +usable artifact, whether that is an illumos zone or a tarball of static +assets. + +The release engineering process builds the control plane and combines +it with the many external artifacts into a final artifact -- a Zip +archive of a TUF repository -- that contains everything necessary for +the product to operate. This process is run on each commit to ensure it +is always functional. You can also run the process locally with +`cargo xtask releng`. + +== Process overview + +`cargo xtask releng` performs all of these steps in parallel (with +the temporary exception of artifact downloads handled by +`tools/install_builder_prerequisites.sh`): + +. `tools/install_builder_prerequisites.sh` downloads several artifacts + (via the `tools/ci_*` scripts) that are necessary to build Omicron; + many of these are ultimately packaged by `omicron-package`. These + scripts are generally controlled by the `tools/*_version` and + `tools/*_checksums` files. +. `cargo xtask releng` downloads the current root of trust and + service processor images built by the Hubris release engineering + process, which are signed in https://github.com/oxidecomputer/permission-slip[Permission Slip]. + This is controlled by the `tools/permslip_production` and + `tools/permslip_staging` files. +. `omicron-package` is the heart of the release engineering process; it + reads the manifest from `package-manifest.toml`, runs an appropriate + `cargo build` command, downloads any additional artifacts, and + packages them into a series of illumos zones and tarballs. (It can + also manage installation and uninstallation of these zones; see + how-to-run.adoc.) +. Some of the illumos zones are distributed with the OS images (because + they are reliant on OS-specific APIs), and some are distributed + separately. `cargo xtask releng` unpacks the zones for the OS image + into a temporary directory that is overlaid onto the OS image in the + next step. +. `helios-build` from the https://github.com/oxidecomputer/helios[Helios] + repository then builds two images: the *host* image, which is used + during normal operation, and the *trampoline* (or *recovery*) image, + which is used to update the host image. +. Finally, `cargo xtask releng` generates a Zip archive of a + https://theupdateframework.io/[TUF] repository, which contains the + host and trampoline OS images, the ROT and SP images, and all the + illumos zones that are not installed into the OS images. This archive + can be uploaded to Wicket to perform an upgrade of the rack while the + control plane is not running. + +== Beyond `cargo xtask releng` + +Currently we use TUF repos generated in CI (by `cargo xtask releng`) +directly. These repositories use a generated throwaway key to sign +the TUF metadata. In the limit, we will have a process to sign release +builds of these TUF repositories, which will be available as a Zip +archive for an operator to upload to Nexus or Wicket, as well as an +HTTPS repository for racks connected to the internet or with access to +a proxy to perform automatic updates. The exact nature of the PKI and +trust policies for each of these update flows is under discussion. diff --git a/package-manifest.toml b/package-manifest.toml index 5da7ed6867..825aeea8a8 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -592,7 +592,7 @@ only_for_targets.image = "standard" only_for_targets.switch = "asic" [package.pumpkind-gz] -service_name = "pumpkind-gz" +service_name = "pumpkind" source.type = "prebuilt" source.repo = "pumpkind" source.commit = "3fe9c306590fb2f28f54ace7fd18b3c126323683" diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs index 3b8bd24918..09fa7ab178 100644 --- a/package/src/bin/omicron-package.rs +++ b/package/src/bin/omicron-package.rs @@ -199,6 +199,25 @@ async fn do_dot(config: &Config) -> Result<()> { Ok(()) } +async fn do_list_outputs( + config: &Config, + output_directory: &Utf8Path, + intermediate: bool, +) -> Result<()> { + for (name, package) in + config.package_config.packages_to_build(&config.target).0 + { + if !intermediate + && package.output + == (PackageOutput::Zone { intermediate_only: true }) + { + continue; + } + println!("{}", package.get_output_path(name, output_directory)); + } + Ok(()) +} + // The name reserved for the currently-in-use build target. const ACTIVE: &str = "active"; @@ -919,7 +938,7 @@ async fn main() -> Result<()> { tokio::fs::create_dir_all(&args.artifact_dir).await?; let logpath = args.artifact_dir.join("LOG"); let logfile = std::io::LineWriter::new(open_options.open(&logpath)?); - println!("Logging to: {}", std::fs::canonicalize(logpath)?.display()); + eprintln!("Logging to: {}", std::fs::canonicalize(logpath)?.display()); let drain = slog_bunyan::new(logfile).build().fuse(); let drain = slog_async::Async::new(drain).build().fuse(); @@ -981,6 +1000,10 @@ async fn main() -> Result<()> { SubCommand::Build(BuildCommand::Dot) => { do_dot(&get_config()?).await?; } + SubCommand::Build(BuildCommand::ListOutputs { intermediate }) => { + do_list_outputs(&get_config()?, &args.artifact_dir, *intermediate) + .await?; + } SubCommand::Build(BuildCommand::Package { disable_cache }) => { do_package(&get_config()?, &args.artifact_dir, *disable_cache) .await?; diff --git a/package/src/lib.rs b/package/src/lib.rs index bba1a3a0cd..2b99cfbe07 100644 --- a/package/src/lib.rs +++ b/package/src/lib.rs @@ -90,6 +90,11 @@ pub enum BuildCommand { }, /// Make a `dot` graph to visualize the package tree Dot, + /// List the output packages for the current target + ListOutputs { + #[clap(long)] + intermediate: bool, + }, /// Builds the packages specified in a manifest, and places them into an /// 'out' directory. Package { diff --git a/tools/build-host-image.sh b/tools/build-host-image.sh deleted file mode 100755 index e90d800849..0000000000 --- a/tools/build-host-image.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env bash - -set -o errexit -set -o pipefail -set -o xtrace - -function usage -{ - echo "usage: $0 [-fRB] HELIOS_PATH PACKAGES_TARBALL" - echo - echo " -f Force helios build despite git hash mismatch" - echo " -R Build recovery (trampoline) image" - echo " -B Build standard image" - exit 1 -} - -function main -{ - while getopts ":hfRBS:" opt; do - case $opt in - f) - FORCE=1 - ;; - R) - BUILD_RECOVERY=1 - HELIOS_BUILD_EXTRA_ARGS=-R - IMAGE_PREFIX=recovery - ;; - B) - BUILD_STANDARD=1 - HELIOS_BUILD_EXTRA_ARGS=-B - IMAGE_PREFIX=ci - ;; - S) - SWITCH_ZONE=$OPTARG - ;; - h | \?) - usage - ;; - esac - done - shift $((OPTIND-1)) - - # Ensure we got either -R or -B but not both - case "x$BUILD_RECOVERY$BUILD_STANDARD" in - x11) - echo "specify at most one of -R, -B" - exit 1 - ;; - x) - echo "must specify either -R or -B" - exit 1 - ;; - *) ;; - esac - - if [ "$#" != "2" ]; then - usage - fi - HELIOS_PATH=$1 - GLOBAL_ZONE_TARBALL_PATH=$2 - - TOOLS_DIR="$(pwd)/$(dirname "$0")" - - # Grab the opte version - OPTE_VER=$(cat "$TOOLS_DIR/opte_version") - - # Assemble global zone files in a temporary directory. - if ! tmp_gz=$(mktemp -d); then - exit 1 - fi - trap 'cd /; rm -rf "$tmp_gz"' EXIT - - # Extract the global zone tarball into a tmp_gz directory - echo "Extracting gz packages into $tmp_gz" - ptime -m tar xvzf "$GLOBAL_ZONE_TARBALL_PATH" -C "$tmp_gz" - - # If the user specified a switch zone (which is probably named - # `switch-SOME_VARIANT.tar.gz`), stage it in the right place and rename it - # to just `switch.tar.gz`. - if [ "x$SWITCH_ZONE" != "x" ]; then - mkdir -p "$tmp_gz/root/opt/oxide" - cp "$SWITCH_ZONE" "$tmp_gz/root/opt/oxide/switch.tar.gz" - fi - - if [ "x$BUILD_STANDARD" != "x" ]; then - mkdir -p "$tmp_gz/root/root" - echo "# Add opteadm, ddmadm, oxlog to PATH" >> "$tmp_gz/root/root/.profile" - echo 'export PATH=$PATH:/opt/oxide/opte/bin:/opt/oxide/mg-ddm:/opt/oxide/oxlog' >> "$tmp_gz/root/root/.profile" - fi - - # Move to the helios checkout - cd "$HELIOS_PATH" - - HELIOS_REPO=https://pkg.oxide.computer/helios/2/dev/ - - # Build an image name that includes the omicron and host OS hashes - IMAGE_NAME="$IMAGE_PREFIX ${GITHUB_SHA:0:7}" - # The ${os_short_commit} token will be expanded by `helios-build` - IMAGE_NAME+='/${os_short_commit}' - IMAGE_NAME+=" $(date +'%Y-%m-%d %H:%M')" - - ./helios-build experiment-image \ - -p helios-dev="$HELIOS_REPO" \ - -F optever="$OPTE_VER" \ - -P "$tmp_gz/root" \ - -N "$IMAGE_NAME" \ - $HELIOS_BUILD_EXTRA_ARGS -} - -main "$@" diff --git a/tools/hubris_checksums b/tools/hubris_checksums deleted file mode 100644 index 913cc460c4..0000000000 --- a/tools/hubris_checksums +++ /dev/null @@ -1,8 +0,0 @@ -4d38415a186fb1058c991d0e5ed44711457526e32687ff48ab6d6feadd8b4aa4 build-gimlet-c-image-default-v1.0.13.zip -ead1988cfebb4f79c364a2207f0bda741b8dd0e4f02fb34b4d341c648ecaa733 build-gimlet-d-image-default-v1.0.13.zip -85f5fc9c206c5fc61b4c2380b94a337220e944d67c0cb6bb2cb2486f8d5bc193 build-gimlet-e-image-default-v1.0.13.zip -ac7d898369e94e33b3556a405352b24a1ee107ce877d416811d9e9fae1f1a1ec build-gimlet-f-image-default-v1.0.13.zip -8cf812dc4aacc013335eb932d2bfaf8a542dec7bc29ea671d9a4235c12d61564 build-psc-b-image-default-v1.0.13.zip -85622677eef52c6d210f44e82b2b6cdc5a8357e509744abe1693883b7635b38c build-psc-c-image-default-v1.0.13.zip -87d6cd4add1aabe53756ba8f66a461cd3aa08f1a0093f94ea81a35a6a175ed21 build-sidecar-b-image-default-v1.0.13.zip -d50d6f77da6fc736843b5418359532f18b7ffa090c2a3d68b5dc1d35281385f5 build-sidecar-c-image-default-v1.0.13.zip diff --git a/tools/hubris_version b/tools/hubris_version deleted file mode 100644 index 717d36cec2..0000000000 --- a/tools/hubris_version +++ /dev/null @@ -1 +0,0 @@ -TAGS=(gimlet-v1.0.13 psc-v1.0.13 sidecar-v1.0.13) diff --git a/tools/permslip_commit b/tools/permslip_commit deleted file mode 100644 index 58140df7da..0000000000 --- a/tools/permslip_commit +++ /dev/null @@ -1 +0,0 @@ -COMMIT=5d44e0065f90051a28881c75e3574142ada9b695 diff --git a/tufaceous-lib/src/assemble/manifest.rs b/tufaceous-lib/src/assemble/manifest.rs index 8825327c1d..1c4a676f4c 100644 --- a/tufaceous-lib/src/assemble/manifest.rs +++ b/tufaceous-lib/src/assemble/manifest.rs @@ -524,6 +524,8 @@ impl DeserializedFileArtifactSource { pub enum DeserializedControlPlaneZoneSource { File { path: Utf8PathBuf, + #[serde(skip_serializing_if = "Option::is_none")] + file_name: Option, }, Fake { name: String, @@ -542,12 +544,15 @@ impl DeserializedControlPlaneZoneSource { F: FnOnce(&str, CompositeEntry<'_>) -> Result, { let (name, data, mtime_source) = match self { - DeserializedControlPlaneZoneSource::File { path } => { + DeserializedControlPlaneZoneSource::File { path, file_name } => { let data = std::fs::read(path) .with_context(|| format!("failed to read {path}"))?; - let name = path.file_name().with_context(|| { - format!("zone path missing file name: {path}") - })?; + let name = file_name + .as_deref() + .or_else(|| path.file_name()) + .with_context(|| { + format!("zone path missing file name: {path}") + })?; // For now, always use the current time as the source. (Maybe // change this to use the mtime on disk in the future?) (name, data, MtimeSource::Now) diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 43392665c7..998b45382e 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -46,6 +46,7 @@ either = { version = "1.11.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.30" } +fs-err = { version = "2.11.0", default-features = false, features = ["tokio"] } futures = { version = "0.3.30" } futures-channel = { version = "0.3.30", features = ["sink"] } futures-core = { version = "0.3.30" } @@ -81,11 +82,8 @@ peg-runtime = { version = "0.8.3", default-features = false, features = ["std"] pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.5", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } -ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.82" } -rand = { version = "0.8.5" } -rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.4" } regex-automata = { version = "0.4.5", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.2" } @@ -121,7 +119,6 @@ uuid = { version = "1.8.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.32", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } -zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } [build-dependencies] ahash = { version = "0.8.8" } @@ -153,6 +150,7 @@ either = { version = "1.11.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.30" } +fs-err = { version = "2.11.0", default-features = false, features = ["tokio"] } futures = { version = "0.3.30" } futures-channel = { version = "0.3.30", features = ["sink"] } futures-core = { version = "0.3.30" } @@ -188,11 +186,8 @@ peg-runtime = { version = "0.8.3", default-features = false, features = ["std"] pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.5", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } -ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.82" } -rand = { version = "0.8.5" } -rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.4" } regex-automata = { version = "0.4.5", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "unicode"] } regex-syntax = { version = "0.8.2" } @@ -229,7 +224,6 @@ uuid = { version = "1.8.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.32", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } -zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } [target.x86_64-unknown-linux-gnu.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.2", default-features = false, features = ["std"] }