From 36499de421fa61a0b1275b04f1a69773d822b175 Mon Sep 17 00:00:00 2001 From: Maru Newby Date: Thu, 13 Jun 2024 19:18:57 +0200 Subject: [PATCH] [testing] Enable bootstrap tester e2e --- .github/workflows/ci.yml | 9 + go.mod | 30 +- go.sum | 60 ++- scripts/build_bootstrap_monitor.sh | 13 + scripts/build_bootstrap_monitor_image.sh | 55 ++ scripts/tests.e2e.bootstrap_tester.sh | 70 +++ tests/bootstrap/README.md | 146 +++++ tests/bootstrap/cmd/main.go | 6 + tests/bootstrap/controller.go | 639 ++++++++++++++++++++++ tests/bootstrap/e2e/e2e_test.go | 644 +++++++++++++++++++++++ tests/bootstrap/monitor/Dockerfile | 29 + tests/bootstrap/monitor/main.go | 296 +++++++++++ tests/fixture/tmpnet/node_process.go | 6 +- tests/fixture/tmpnet/utils.go | 12 +- 14 files changed, 2006 insertions(+), 9 deletions(-) create mode 100755 scripts/build_bootstrap_monitor.sh create mode 100755 scripts/build_bootstrap_monitor_image.sh create mode 100755 scripts/tests.e2e.bootstrap_tester.sh create mode 100644 tests/bootstrap/README.md create mode 100644 tests/bootstrap/cmd/main.go create mode 100644 tests/bootstrap/controller.go create mode 100644 tests/bootstrap/e2e/e2e_test.go create mode 100644 tests/bootstrap/monitor/Dockerfile create mode 100644 tests/bootstrap/monitor/main.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a7c6145e2a61..77527b270948 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -240,3 +240,12 @@ jobs: run: bash -x scripts/tests.build_antithesis_images.sh env: TEST_SETUP: xsvm + test_build_bootstrap_monitor_image: + name: Build bootstrap monitor image + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-go-for-project + - name: Build AvalancheGo Binary + shell: bash + run: bash -x ./scripts/tests.e2e.bootstrap_tester.sh diff --git a/go.mod b/go.mod index f847fee1dd8f..b4d1c226bc44 100644 --- a/go.mod +++ b/go.mod @@ -22,7 +22,7 @@ require ( github.com/google/uuid v1.6.0 github.com/gorilla/mux v1.8.0 github.com/gorilla/rpc v1.2.0 - github.com/gorilla/websocket v1.4.2 + github.com/gorilla/websocket v1.5.0 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/holiman/uint256 v1.2.4 github.com/huin/goupnp v1.3.0 @@ -69,6 +69,10 @@ require ( google.golang.org/protobuf v1.34.2 gopkg.in/natefinch/lumberjack.v2 v2.0.0 gopkg.in/yaml.v3 v3.0.1 + // TODO(marun) Upgrade to more recent version once avalanchego is on golang 1.22 + k8s.io/api v0.29.0 + k8s.io/apimachinery v0.29.0 + k8s.io/client-go v0.29.0 ) require ( @@ -96,6 +100,7 @@ require ( github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/dop251/goja v0.0.0-20230806174421-c933cf95e127 // indirect + github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/ethereum/c-kzg-4844 v0.4.0 // indirect github.com/fjl/memsize v0.0.0-20190710130421-bcb5799ab5e5 // indirect github.com/frankban/quicktest v1.14.4 // indirect @@ -106,11 +111,16 @@ require ( github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect + github.com/go-openapi/jsonpointer v0.19.6 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.22.3 // indirect github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect + github.com/google/gnostic-models v0.6.8 // indirect + github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20230207041349-798e818bf904 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect github.com/hashicorp/go-bexpr v0.1.10 // indirect @@ -118,11 +128,15 @@ require ( github.com/hashicorp/hcl v1.0.0 // indirect github.com/holiman/billy v0.0.0-20230718173358-1c7e68d277a7 // indirect github.com/holiman/bloomfilter/v2 v2.0.3 // indirect + github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.15.15 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/magiconair/properties v1.8.6 // indirect + github.com/mailru/easyjson v0.7.7 // indirect github.com/mattn/go-colorable v0.1.13 // indirect github.com/mattn/go-isatty v0.0.17 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect @@ -130,6 +144,11 @@ require ( github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/mitchellh/pointerstructure v1.2.0 // indirect github.com/mmcloughlin/addchain v0.4.0 // indirect + github.com/moby/spdystream v0.2.0 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/olekukonko/tablewriter v0.0.5 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/pelletier/go-toml v1.9.5 // indirect @@ -157,11 +176,20 @@ require ( go.opentelemetry.io/otel/metric v1.22.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect go.uber.org/multierr v1.11.0 // indirect + golang.org/x/oauth2 v0.16.0 // indirect golang.org/x/sys v0.18.0 // indirect golang.org/x/text v0.14.0 // indirect golang.org/x/tools v0.17.0 // indirect + google.golang.org/appengine v1.6.8 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240123012728-ef4313101c80 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect + k8s.io/klog/v2 v2.110.1 // indirect + k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect + k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect rsc.io/tmplfunc v0.0.3 // indirect + sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + sigs.k8s.io/yaml v1.3.0 // indirect ) diff --git a/go.sum b/go.sum index 5881b9005f47..9a423a147045 100644 --- a/go.sum +++ b/go.sum @@ -62,6 +62,8 @@ github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax github.com/antithesishq/antithesis-sdk-go v0.3.8 h1:OvGoHxIcOXFJLyn9IJQ5DzByZ3YVAWNBc394ObzDRb8= github.com/antithesishq/antithesis-sdk-go v0.3.8/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/ava-labs/coreth v0.13.8-fixed-genesis-upgrade.0.20240813194342-7635a96aa180 h1:6aIHp7wbyGVYdhHVQUbG7BEcbCMEQ5SYopPPJyipyvk= github.com/ava-labs/coreth v0.13.8-fixed-genesis-upgrade.0.20240813194342-7635a96aa180/go.mod h1:/wNBVq7J7wlC2Kbov7kk6LV5xZvau7VF9zwTVOeyAjY= github.com/ava-labs/ledger-avalanche/go v0.0.0-20240610153809-9c955cc90a95 h1:dOVbtdnZL++pENdTCNZ1nu41eYDQkTML4sWebDnnq8c= @@ -177,6 +179,8 @@ github.com/dop251/goja_nodejs v0.0.0-20210225215109-d91c329300e7/go.mod h1:hn7BA github.com/dop251/goja_nodejs v0.0.0-20211022123610-8dd9abb0616d/go.mod h1:DngW8aVqWbuLRMHItjPUyqdj+HWPvnQe8V8y1nDpIbM= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= +github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= +github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -218,6 +222,7 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -225,6 +230,12 @@ github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU= github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= @@ -279,6 +290,8 @@ github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Z github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.1.2 h1:xf4v41cLI2Z6FxbKm+8Bu+m8ifhj15JuZ9sa0jZCMUU= github.com/google/btree v1.1.2/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= +github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= +github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -328,8 +341,9 @@ github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB7 github.com/gorilla/rpc v1.2.0 h1:WvvdC2lNeT1SP32zrIce5l0ECBfbAlmrmSBsuc57wfk= github.com/gorilla/rpc v1.2.0/go.mod h1:V4h9r+4sF5HnzqbwIez0fKSpANP0zlYd3qR7p36jkTQ= github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= -github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= +github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= +github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= @@ -356,6 +370,8 @@ github.com/hydrogen18/memlistener v0.0.0-20200120041712-dcc25e7acd91/go.mod h1:q github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/ianlancetaylor/demangle v0.0.0-20220319035150-800ac71e25c2/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= +github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= +github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= @@ -370,9 +386,13 @@ github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7Bd github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc= github.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jrick/logrotate v1.0.0/go.mod h1:LNinyqDIJnpAur+b8yyulnQw/wDuN1+BYKlTRt3OuAQ= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= @@ -409,6 +429,8 @@ github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2 github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/magiconair/properties v1.8.6 h1:5ibWZ6iY0NctNGWo87LalDlEZ6R41TqbbDamhfG/Qzo= github.com/magiconair/properties v1.8.6/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= @@ -442,13 +464,22 @@ github.com/mitchellh/pointerstructure v1.2.0/go.mod h1:BRAsLI5zgXmw97Lf6s25bs8oh github.com/mmcloughlin/addchain v0.4.0 h1:SobOdjm2xLj1KkXN5/n0xTIWyZA2+s99UCY1iPfkHRY= github.com/mmcloughlin/addchain v0.4.0/go.mod h1:A86O+tHqZLMNO4w6ZZ4FlVQEadcoqkyU72HC5wJ4RlU= github.com/mmcloughlin/profile v0.1.1/go.mod h1:IhHD7q1ooxgwTgjxQYkACGA77oFTDdFVejUS1/tS/qU= +github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= +github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg= github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzEE/Zbp4w= github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w= @@ -554,6 +585,7 @@ github.com/status-im/keycard-go v0.2.0 h1:QDLFswOQu1r5jsycloeQh3bVU8n/NatHHaZobt github.com/status-im/keycard-go v0.2.0/go.mod h1:wlp8ZLbsmrF6g6WjugPAx+IzoLrkdf9+mHxBEeo3Hbg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v0.0.0-20161117074351-18a02ba4a312/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.1.5-0.20170601210322-f6abca593680/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -565,6 +597,7 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/subosito/gotenv v1.3.0 h1:mjC+YW8QpAdXibNi+vNWgzmgBH4+5l5dCXv8cNysBLI= @@ -757,6 +790,8 @@ golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.16.0 h1:aDkGMBSYxElaoP81NpoUoz2oo2R2wHdZpGToUxfyQrQ= +golang.org/x/oauth2 v0.16.0/go.mod h1:hqZ+0LWXsiVoZpeld6jVt06P3adbS2Uu911W1SsJv2o= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -951,6 +986,8 @@ google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= @@ -1038,6 +1075,8 @@ gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/ini.v1 v1.51.1/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= @@ -1049,6 +1088,7 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWD gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= @@ -1066,6 +1106,18 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +k8s.io/api v0.29.0 h1:NiCdQMY1QOp1H8lfRyeEf8eOwV6+0xA6XEE44ohDX2A= +k8s.io/api v0.29.0/go.mod h1:sdVmXoz2Bo/cb77Pxi71IPTSErEW32xa4aXwKH7gfBA= +k8s.io/apimachinery v0.29.0 h1:+ACVktwyicPz0oc6MTMLwa2Pw3ouLAfAon1wPLtG48o= +k8s.io/apimachinery v0.29.0/go.mod h1:eVBxQ/cwiJxH58eK/jd/vAk4mrxmVlnpBH5J2GbMeis= +k8s.io/client-go v0.29.0 h1:KmlDtFcrdUzOYrBhXHgKw5ycWzc3ryPX5mQe0SkG3y8= +k8s.io/client-go v0.29.0/go.mod h1:yLkXH4HKMAywcrD82KMSmfYg2DlE8mepPR4JGSo5n38= +k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= +k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= +k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= +k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= launchpad.net/gocheck v0.0.0-20140225173054-000000000087 h1:Izowp2XBH6Ya6rv+hqbceQyw/gSGoXfH/UPoTGduL54= launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80Vse0e+BUHsHMTEhd0O4cpUHr/e/BUM= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= @@ -1073,3 +1125,9 @@ rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= rsc.io/tmplfunc v0.0.3 h1:53XFQh69AfOa8Tw0Jm7t+GV7KZhOi6jzsCzTtKbMvzU= rsc.io/tmplfunc v0.0.3/go.mod h1:AG3sTPzElb1Io3Yg4voV9AGZJuleGAwaVRxL9M49PhA= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= +sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= +sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= +sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= diff --git a/scripts/build_bootstrap_monitor.sh b/scripts/build_bootstrap_monitor.sh new file mode 100755 index 000000000000..0b061b5c7eae --- /dev/null +++ b/scripts/build_bootstrap_monitor.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Avalanchego root folder +AVALANCHE_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )"; cd .. && pwd ) +# Load the constants +source "$AVALANCHE_PATH"/scripts/constants.sh + +echo "Building bootstrap-monitor..." +go build -ldflags "$static_ld_flags"\ + -o "$AVALANCHE_PATH/build/bootstrap-monitor"\ + "$AVALANCHE_PATH/tests/bootstrap/monitor/"*.go diff --git a/scripts/build_bootstrap_monitor_image.sh b/scripts/build_bootstrap_monitor_image.sh new file mode 100755 index 000000000000..2c92a1b5e9da --- /dev/null +++ b/scripts/build_bootstrap_monitor_image.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# e.g., +# ./scripts/build_bootstrap_monitor_image.sh # Build local image +# DOCKER_IMAGE=myavalanchego ./scripts/build_bootstrap_monitor_image.sh # Build local single arch image with a custom image name +# DOCKER_IMAGE=avaplatform/bootstrap-monitor ./scripts/build_bootstrap_monitor_image.sh # Build and push image to docker hub + +# Builds the image for the bootstrap monitor +# +# TODO(marun) Elaborate + +# Directory above this script +AVALANCHE_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )"; cd .. && pwd ) + +# Load the constants +source "$AVALANCHE_PATH"/scripts/constants.sh + +# The published name should be 'avaplatform/bootstrap-monitor', but to avoid unintentional +# pushes it is defaulted to 'bootstrap-monitor' (without a repo or registry name) which can +# only be used to create local images. +DOCKER_IMAGE=${DOCKER_IMAGE:-"bootstrap-monitor"} + +# buildx (BuildKit) improves the speed and UI of builds over the legacy builder. +# +# Reference: https://docs.docker.com/build/buildkit/ +DOCKER_CMD="docker buildx build" + +# The dockerfile doesn't specify the golang version to minimize the +# changes required to bump the version. Instead, the golang version is +# provided as an argument. +GO_VERSION="$(go list -m -f '{{.GoVersion}}')" +DOCKER_CMD="${DOCKER_CMD} --build-arg GO_VERSION=${GO_VERSION}" + +if [[ "${DOCKER_IMAGE}" == *"/"* ]]; then + # A slash in the image name indicates that a registry should be pushed to. + DOCKER_CMD="${DOCKER_CMD} --push}" + + # A populated DOCKER_USERNAME env var triggers login + if [[ -n "${DOCKER_USERNAME:-}" ]]; then + echo "$DOCKER_PASS" | docker login --username "$DOCKER_USERNAME" --password-stdin + fi +fi + +echo "Building Docker Image with tags: $DOCKER_IMAGE:$commit_hash , $DOCKER_IMAGE:$image_tag" +${DOCKER_CMD} -t "$DOCKER_IMAGE:$commit_hash" -t "$DOCKER_IMAGE:$image_tag" \ + "$AVALANCHE_PATH" -f "$AVALANCHE_PATH/tests/bootstrap/monitor/Dockerfile" + + +# Only tag the latest image for the master branch when images are pushed to a registry +if [[ "${DOCKER_IMAGE}" == *"/"* && $image_tag == "master" ]]; then + echo "Tagging current bootstrap-monitor images as $DOCKER_IMAGE:latest" + docker buildx imagetools create -t "$DOCKER_IMAGE:latest" "$DOCKER_IMAGE:$commit_hash" +fi diff --git a/scripts/tests.e2e.bootstrap_tester.sh b/scripts/tests.e2e.bootstrap_tester.sh new file mode 100755 index 000000000000..f87a65596127 --- /dev/null +++ b/scripts/tests.e2e.bootstrap_tester.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Run e2e tests for bootstrap tester. + +# TODO(marun) Rename this file + +if ! [[ "$0" =~ scripts/tests.e2e.bootstrap_tester.sh ]]; then + echo "must be run from repository root" + exit 255 +fi + +# Determine DIST and ARCH in case installation is required for kubectl and xkind +# TODO(marun) Factor this out for reuse (or just switch to nix already) +if which sw_vers &> /dev/null; then + OS="darwin" + ARCH="$(uname -m)" +else + # Assume linux (windows is not supported) + OS="linux" + RAW_ARCH="$(uname -i)" + # Convert the linux arch string to the string used for k8s releases + if [[ "${RAW_ARCH}" == "aarch64" ]]; then + ARCH="arm64" + elif [[ "${RAW_ARCH}" == "x86_64" ]]; then + ARCH="amd64" + else + echo "Unsupported architecture: ${RAW_ARCH}" + exit 1 + fi +fi + +function ensure_command { + local cmd=$1 + local install_uri=$2 + + if ! command -v "${cmd}" &> /dev/null; then + # Try to use a local version + local local_cmd="${PWD}/bin/${cmd}" + mkdir -p "${PWD}/bin" + if ! command -v "${local_cmd}" &> /dev/null; then + echo "${cmd} not found, attempting to install..." + curl -L -o "${local_cmd}" "${install_uri}" + # TODO(marun) Optionally validate the binary against published checksum + chmod +x "${local_cmd}" + fi + fi +} + +# Ensure the kubectl command is available +KUBECTL_VERSION=v1.30.2 +ensure_command kubectl "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/${OS}/${ARCH}/kubectl" + +# Ensure the kind command is available +KIND_VERSION=v0.23.0 +ensure_command kind "https://kind.sigs.k8s.io/dl/${KIND_VERSION}/kind-${OS}-${ARCH}" + +# Ensure the kind-with-registry command is available +ensure_command "kind-with-registry.sh" "https://raw.githubusercontent.com/kubernetes-sigs/kind/7cb9e6be25b48a0e248097eef29d496ab1a044d0/site/static/examples/kind-with-registry.sh" + +# Deploy a kind cluster with a local registry. Include the local bin in the +# path to ensure locally installed kind and kubectl are available since the +# script expected to call them without a qualifying path. +PATH="${PWD}/bin:$PATH" bash -x "${PWD}/bin/kind-with-registry.sh" + +# TODO(marun) Factor out ginkgo installation to avoid duplicating it across test scripts +go install -v github.com/onsi/ginkgo/v2/ginkgo@v2.13.1 + +PATH="${PWD}/bin:$PATH" ginkgo -vv ./tests/bootstrap/e2e diff --git a/tests/bootstrap/README.md b/tests/bootstrap/README.md new file mode 100644 index 000000000000..231c68d85a60 --- /dev/null +++ b/tests/bootstrap/README.md @@ -0,0 +1,146 @@ +# Bootstrap testing + +Bootstrapping an avalanchego node on a persistent network like mainnet +or fuji requires that the version of avalanchego that the node is +running be compatible with the historical data of that +network. Running this test regularly is a good way of insuring against +regressions in compatibility. + + +### Full Sync + +### Pruning + +### State Sync + +## Architecture + +### Controller + + - watches for deployments that manage bootstrap testing + - if such a deployment is using `latest` image tags + - get the version currently associated with `latest` (by running a pod) + - update the image tags for the pod, which will prompt a restart to use the new tag + - if such a deployment is using specific tags + - check its health + - require a service with the same name as the deployment + - use an informer with a resync every 5 minutes? + - i.e. only check health every 5 minutes + - if healthy, get the latest version (by running a pod) + - if the version is not different from the running version, do nothing + - if the version is different, update the tags for the deploymnet + - this will prompt a redeployment + - maybe cache the version to avoid running too many pods? + - actually, the check for a new version should be cheap. So every 5 minutes should be fine. + +### Monitor + + - every [interval] + - if node running on localhost port is healthy + - get the image digest of the image that the node is running as + - get image digest avalanchego image currently tagged `latest` + - if the 2 digests differ + - trigger a new bootstrap test by updating the image for the deployment managing the pod + + - every [interval] + - uses curl to check whether an avalanche node running on localhost is healthy + - if the node is healthy + - use kubectl to wait for a pod using image avaplatform/avalanchego:latest to complete + - use kubectl to retrieve the image digest from the terminated pod + - compose the expected image name using the image digest i.e. `avaplatform/avalanchego:[image digest of terminated pod]` + - use kubectl to retrieve the image for the 'node' container in the same pod as the bash script + - if the image for the node container does not match the expected image name + - use kubectl to discover the name of the deployment that is managing the pod the script is running in + - update the 'node' container of the deployment to use the expected image name + + +### Bootstrap pods + +### ArgoCD + +- will need to ignore differences to the image tags of the deployments + +#### Containers + + - init container + - uses avalanchego image + - mounts /data + - initializes the /data path by checking the version against one that was saved +```bash +version_path="/data/bootstrap_version.json" + +latest_version=$(/avalanchego/build/avalanchego --version-json) + +if [ -f "${version_path}" ] && diff <(echo "${latest_version}") "${version_path}"; then + echo "Resuming bootstrap for ${latest_version}" + exit 0 +fi + +echo "Starting bootstrap for ${latest_version}" + +echo "Recording version" +echo "${latest_version}" > "${version_path}" + +echo "Clearing Recording version" +rm -rf /data/node/* + +# Ensure the node path exists +mkdir /data/node +``` + - avalanche container + + +## Alternatives considered + +#### self-hosteed github workers + + - allow triggering / reporting to happen with github + - but 5 day limit on job duration wouldn't probably wouldn't support full-sync + +#### Adding a 'bootstrap mode' to avalanchego + - with a --bootstrap-mode flag, exit on successful bootstrap + - but using it without a controller would require using `latest` to + ensure that the node version could change on restarts + - but when using `latest` there is no way to avoid having pod + restart preventing the completion of an in-process bootstrap + test. Only by using a specific image tag will it be possible for + a restarted pod to reliably resume a bootstrap test. + + +### Primary Requirement + + - Run full sync and state sync bootstrap tests against mainnet and testnet + +### Secondary requiremnts + + - Run tests in infra-managed kubernetes + - Ensures sufficient resources (~2tb required per test) + - Ensures metrics and logs will be collected by Datadog + - Ensure that no more than one test will be run against a given image + - Ensure that an in-process bootstrap test can be resumed if a pod is restarted + +### TODO + - accepts bootstrap configurations + - network-id + - sync-mode + - kube configuration + - in-cluster or not + - start a wait loop (via kubernetes) + - check image version + - for each bootstrap config + - if config.running - + - continue + - if config.last_version != version + - start a new test run + - pass a + + +StartJob(clientset, namespace, imageName, pvcSize, flags) + - starts node + - periodically checks if node pod is running and node is healthy + - errors out on timeout + - log result (use zap) + - on healthy + - log result + - update last version + - update the config to ' diff --git a/tests/bootstrap/cmd/main.go b/tests/bootstrap/cmd/main.go new file mode 100644 index 000000000000..0313c629b0b5 --- /dev/null +++ b/tests/bootstrap/cmd/main.go @@ -0,0 +1,6 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package main + +// Entrypoint for bootstrap controller diff --git a/tests/bootstrap/controller.go b/tests/bootstrap/controller.go new file mode 100644 index 000000000000..3d1bc53fc30a --- /dev/null +++ b/tests/bootstrap/controller.go @@ -0,0 +1,639 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package bootstrap + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "net/http" + "net/url" + "os" + "strconv" + "strings" + "sync" + "time" + + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/client-go/tools/portforward" + "k8s.io/client-go/transport/spdy" + + "github.com/ava-labs/avalanchego/config" + "github.com/ava-labs/avalanchego/tests/fixture/tmpnet" + "github.com/ava-labs/avalanchego/utils/logging" + "github.com/ava-labs/avalanchego/version" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + restclient "k8s.io/client-go/rest" +) + +const ( + ContainerName = "avalanchego" + + mountPath = "/data" + + DefaultTimeout = 2 * time.Minute +) + +type TestID struct { + NetworkID string + StateSync bool +} + +type TestResult struct { + TestID + + Image ImageDetails + StartTime time.Time + EndTime time.Time + PodName string + Error string +} + +type TestConfig struct { + TestID + + PVCSize string + Flags map[string]string +} + +type TestConfigStatus struct { + // Whether the test is currently running + IsRunning bool + // The avalanchego commit that is being tested + Commit string +} + +type TestController struct { + TestConfigs []TestConfig + Namespace string + ImageName string + Status map[TestID]TestConfigStatus + + // TODO(marun) Rename to KubeClientset + Clientset *kubernetes.Clientset + KubeConfig *restclient.Config + + statusLock sync.RWMutex +} + +func (c *TestController) GetStatus(testID TestID) (TestConfigStatus, bool) { + c.statusLock.RLock() + defer c.statusLock.RUnlock() + status, ok := c.Status[testID] + return status, ok +} + +func (c *TestController) SetStatus(testID TestID, status TestConfigStatus) { + c.statusLock.Lock() + defer c.statusLock.Unlock() + + c.Status[testID] = status + + // TODO(marun) Write to configmap to allow restart +} + +type ImageDetails struct { + // The identifier used to start the image + ImageID string + // The versions reported by the image's avalanchego binary + Versions version.Versions +} + +func NewTestController(configs []TestConfig, namespace string, imageName string) (*TestController, error) { + // Initialize the clientset from the kubeconfig + + // TODO(marun) Use InCluster? + + kubeconfigPath := os.Getenv("KUBECONFIG") + kubeConfig, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) + if err != nil { + return nil, fmt.Errorf("failed to build kubeconfig: %w", err) + } + clientset, err := kubernetes.NewForConfig(kubeConfig) + if err != nil { + return nil, fmt.Errorf("failed to create kube clientset: %w", err) + } + + return &TestController{ + TestConfigs: configs, + Namespace: namespace, + ImageName: imageName, + Status: map[TestID]TestConfigStatus{}, + Clientset: clientset, + KubeConfig: kubeConfig, + }, nil +} + +func (c *TestController) Run() error { + // TODO(marun) Support detecting running jobs (reuse ) + // - read pods in the namespace with labels indicating running jobs + + return wait.PollImmediateInfinite(time.Minute, func() (bool, error) { + ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) + defer cancel() + + imageDetails, err := GetImageDetails(ctx, c.Clientset, c.Namespace, c.ImageName) + if err != nil { + log.Printf("failed to get image versions: %v", err) + return false, nil + } + + for _, cfg := range c.TestConfigs { + status, ok := c.GetStatus(cfg.TestID) + if !ok || (!status.IsRunning && status.Commit != imageDetails.Versions.Commit) { + c.SetStatus(cfg.TestID, TestConfigStatus{ + IsRunning: true, + Commit: status.Commit, + }) + go c.RunJob(cfg, *imageDetails) + } + } + return false, nil + }) +} + +func (c *TestController) RunJob(cfg TestConfig, imageDetails ImageDetails) { + result := TestResult{ + TestID: cfg.TestID, + StartTime: time.Now(), + Image: imageDetails, + } + + // On error, log result, add the test result to the set of results (configmap) and mark job as not running + // On success, log result, add the test result to the set of results (configmap) and mark job as not running + podName, err := c.TestBootstrap(cfg, imageDetails) + result.EndTime = time.Now() + result.PodName = podName + status := TestConfigStatus{ + IsRunning: false, + } + if err != nil { + result.Error = err.Error() + } else { + status.Commit = imageDetails.Versions.Commit + } + + c.SetStatus(cfg.TestID, status) + + // TODO(marun) Stop the pod + + // TODO(marun) Write result to a configmap + + log.Printf("Test run complete: %v", result) +} + +func (c *TestController) TestBootstrap(cfg TestConfig, imageDetails ImageDetails) (string, error) { + log.Printf("Starting bootstrap test for network %s, state sync %t, commit %s", cfg.NetworkID, cfg.StateSync, imageDetails.Versions.Commit) + + flags := map[string]string{ + config.NetworkNameKey: cfg.NetworkID, + config.HealthCheckFreqKey: "500ms", // Ensure rapid detection of a healthy state + // TODO(marun) This only needs to be set for test purposes? + config.PublicIPKey: "127.0.0.1", // Ensure only ipv4 is used to ensure compatibility with client-go's port-forwarding facility + config.LogDisplayLevelKey: logging.Info.String(), + config.LogLevelKey: logging.Off.String(), + } + for k, v := range cfg.Flags { + flags[k] = v + } + + ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) + defer cancel() + + podLabels := map[string]string{ + "network": cfg.NetworkID, + "state-sync": strconv.FormatBool(cfg.StateSync), + "commit": imageDetails.Versions.Commit, + } + + // TODO(marun) Use the image id from imageDetails to bootstrap with + + nodePod, err := StartNodePod( + ctx, + c.Clientset, + c.Namespace, + c.ImageName, + cfg.PVCSize, + GetDataDogAnnotations(cfg), + podLabels, + flags, + nil, + ) + if err != nil { + wrappedErr := fmt.Errorf("failed to start node pod: %w", err) + // Resources are left in place for debugging + return "", wrappedErr + } + + err = WaitForPodStatus( + ctx, + c.Clientset, + c.Namespace, + nodePod.Name, + PodIsRunning, + ) + if err != nil { + wrappedErr := fmt.Errorf("failed to wait for pod running: %w", err) + // Log enough details to fix the problem. Don't delete. + return nodePod.Name, wrappedErr + } + + log.Printf("Bootstrap test running for network %s, sync mode %s, commit %s", cfg.NetworkID, strconv.FormatBool(cfg.StateSync), imageDetails.Versions.Commit) + + // The pod IP needs to be set before enabling the local forward + // TODO(marun) Move to enableLocalForwardForPod + _, err = WaitForPodIP(ctx, c.Clientset, c.Namespace, nodePod.Name) + if err != nil { + wrappedErr := fmt.Errorf("failed to wait for pod IP: %w", err) + return "", wrappedErr + } + + // TODO(marun) Use the pod port directly when this controller is running as a pod + localPort, localPortStopChan, err := EnableLocalForwardForPod( + c.KubeConfig, + c.Namespace, + nodePod.Name, + config.DefaultHTTPPort, + os.Stdout, + os.Stderr, + ) + if err != nil { + // TODO(marun) Stop the pod? + return nodePod.Name, fmt.Errorf("failed to enable local forward: %w", err) + } + defer close(localPortStopChan) + + localNodeURI := fmt.Sprintf("http://127.0.0.1:%d", localPort) + + log.Printf("Waiting for node to finish bootstrapping for network %s, sync mode %s, commit %s", cfg.NetworkID, strconv.FormatBool(cfg.StateSync), imageDetails.Versions.Commit) + + // TODO(marun) Make these adjustable to simplify testing + err = wait.PollImmediate(5*time.Second, 2*time.Minute, func() (bool, error) { + // TODO(marun) Check if the pod is running + + // TODO(marun) Create constant for this timeout? + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + log.Printf("Checking if node is healthy for network %s, sync mode %s, commit %s", cfg.NetworkID, strconv.FormatBool(cfg.StateSync), imageDetails.Versions.Commit) + + healthReply, err := tmpnet.CheckNodeHealth(ctx, localNodeURI) + if err != nil { + log.Printf("Error checking node health: %v", err) + return false, nil + } + + return healthReply.Healthy, nil + }) + + return nodePod.Name, err +} + +func GetImageDetails( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + imageName string, +) (*ImageDetails, error) { + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "avalanchego-version-check-", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: ContainerName, + Command: []string{"./avalanchego"}, + Args: []string{"--version-json"}, + Image: imageName, + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + } + createdPod, err := clientset.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return nil, err + } + + err = WaitForPodStatus(ctx, clientset, namespace, createdPod.Name, PodHasTerminated) + if err != nil { + return nil, err + } + + terminatedPod, err := clientset.CoreV1().Pods(namespace).Get(ctx, createdPod.Name, metav1.GetOptions{}) + if err != nil { + return nil, err + } + + // Get the image id for the avalanchego image + imageID := "" + for _, status := range terminatedPod.Status.ContainerStatuses { + if status.Name == ContainerName { + imageID = status.ImageID + break + } + } + if len(imageID) == 0 { + return nil, fmt.Errorf("failed to get image id for pod %s.%s", namespace, createdPod.Name) + } + imageIDParts := strings.Split(imageID, ":") + if len(imageIDParts) != 2 { + return nil, fmt.Errorf("unexpected image id format: %s", imageID) + } + + // Request the logs + req := clientset.CoreV1().Pods(namespace).GetLogs(createdPod.Name, &corev1.PodLogOptions{ + Container: ContainerName, + }) + + // Stream the logs + readCloser, err := req.Stream(ctx) + if err != nil { + return nil, err + } + defer readCloser.Close() + + // Marshal the logs into the versions type + bytes, err := io.ReadAll(readCloser) + if err != nil { + return nil, err + } + versions := version.Versions{} + err = json.Unmarshal(bytes, &versions) + if err != nil { + return nil, err + } + + // Only delete the pod if successful to aid in debugging + err = clientset.CoreV1().Pods(namespace).Delete(ctx, createdPod.Name, metav1.DeleteOptions{}) + if err != nil { + return nil, err + } + + return &ImageDetails{ + ImageID: imageIDParts[1], + Versions: versions, + }, nil +} + +func WaitForPodStatus( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + name string, + acceptable func(*corev1.PodStatus) bool, +) error { + watch, err := clientset.CoreV1().Pods(namespace).Watch(ctx, metav1.SingleObject(metav1.ObjectMeta{Name: name})) + if err != nil { + return fmt.Errorf("failed to initiate watch of pod %s/%s: %w", namespace, name, err) + } + + for { + select { + case event := <-watch.ResultChan(): + pod, ok := event.Object.(*corev1.Pod) + if !ok { + continue + } + + if acceptable(&pod.Status) { + return nil + } + case <-ctx.Done(): + return errors.New("timeout waiting for pod readiness") + } + } +} + +func PodIsRunning(status *corev1.PodStatus) bool { + if status.Phase != corev1.PodRunning { + return false + } + + for _, containerStatus := range status.ContainerStatuses { + if !containerStatus.Ready { + return false + } + } + return true +} + +func PodHasTerminated(status *corev1.PodStatus) bool { + return status.Phase == corev1.PodSucceeded || status.Phase == corev1.PodFailed +} + +type NodePod struct { + Namespace string + Name string + PVCName string +} + +func GetDataDogAnnotations(cfg TestConfig) map[string]string { + tags := fmt.Sprintf(`["bootstrap-network:%s", "bootstrap-sync-mode:%s", "is_subnet:no"]`, cfg.NetworkID, strconv.FormatBool(cfg.StateSync)) + // TODO(marun) Make the container name configurable + return map[string]string{ + "ad.datadoghq.com/avalanchego.check_names": `["openmetrics"]`, + "ad.datadoghq.com/avalanchego.init_configs": "[{}]", + "ad.datadoghq.com/tolerate-unready": "true", + "ad.datadoghq.com/avalanchego.instances": `[{"metrics": ["*"], "namespace": "avalanchego", "prometheus_url": "http://%%host%%:%%port_0%%/ext/metrics", "max_returned_metrics": 64000, "send_monotonic_counter": "false"}]`, + "ad.datadoghq.com/tags": tags, + "ad.datadoghq.com/avalanchego.logs": fmt.Sprintf(`[{"source": "avalanchego", "service": "bootstrap-tester","tags": %s}]`, tags), + } +} + +func StartNodePod( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + imageName string, + pvcSize string, + podAnnotations map[string]string, + podLabels map[string]string, + flags map[string]string, + podMutator func(*corev1.Pod), +) (*NodePod, error) { + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "avalanche-pvc-", + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{ + corev1.ReadWriteOnce, + }, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(pvcSize), + }, + }, + }, + } + createdPVC, err := clientset.CoreV1().PersistentVolumeClaims(namespace).Create(ctx, pvc, metav1.CreateOptions{}) + if err != nil { + return nil, err + } + + log.Printf("Created PVC %s.%s", namespace, createdPVC.Name) + + // Ensure the pvc mount path matches the data dir for the node + flags[config.DataDirKey] = mountPath + + // TODO(marun) Ensure images aren't pulled for testing + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "avalanche-node-", + Annotations: podAnnotations, + Labels: podLabels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: ContainerName, + Image: imageName, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "data", + MountPath: mountPath, + }, + }, + Env: StringMapToEnvVarSlice(flags), + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + Volumes: []corev1.Volume{ + { + Name: "data", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: createdPVC.Name, + }, + }, + }, + }, + }, + } + if podMutator != nil { + podMutator(pod) + } + + createdPod, err := clientset.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return nil, err + } + + log.Printf("Created Pod %s.%s", namespace, createdPod.Name) + + return &NodePod{ + Namespace: namespace, + Name: createdPod.Name, + PVCName: createdPVC.Name, + }, nil +} + +func StringMapToEnvVarSlice(mapping map[string]string) []corev1.EnvVar { + envVars := make([]corev1.EnvVar, len(mapping)) + var i int + for k, v := range mapping { + envVars[i] = corev1.EnvVar{ + Name: envVarName(config.EnvPrefix, k), + Value: v, + } + i++ + } + return envVars +} + +func envVarName(prefix string, key string) string { + // e.g. MY_PREFIX, network-id -> MY_PREFIX_NETWORK_ID + return strings.ToUpper(prefix + "_" + config.DashesToUnderscores.Replace(key)) +} + +// enableLocalForwardForPod enables traffic forwarding from a local +// port to the specified pod with client-go. The returned stop channel +// should be closed to stop the port forwarding. +func EnableLocalForwardForPod(kubeConfig *restclient.Config, namespace string, name string, port int, out, errOut io.Writer) (uint16, chan struct{}, error) { + log.Printf("Forwarding traffic from a local port to port %d of pod %s.%s via the Kube API", port, namespace, name) + + transport, upgrader, err := spdy.RoundTripperFor(kubeConfig) + if err != nil { + return 0, nil, fmt.Errorf("failed to create round tripper: %w", err) + } + + dialer := spdy.NewDialer( + upgrader, + &http.Client{ + Transport: transport, + }, + http.MethodPost, + &url.URL{ + Scheme: "https", + Path: fmt.Sprintf("/api/v1/namespaces/%s/pods/%s/portforward", namespace, name), + Host: strings.TrimPrefix(kubeConfig.Host, "https://"), + }, + ) + ports := []string{fmt.Sprintf("0:%d", port)} + + // Need to specify 127.0.0.1 to ensure that forwarding is only + // attempted for the ipv4 address of the pod. By default, kind is + // deployed with only ipv4, and attempting to connect to a pod + // with ipv6 will fail. + // TODO(marun) This should no longer be required + addresses := []string{"127.0.0.1"} + + stopChan, readyChan := make(chan struct{}, 1), make(chan struct{}, 1) + forwarder, err := portforward.NewOnAddresses(dialer, addresses, ports, stopChan, readyChan, out, errOut) + if err != nil { + return 0, nil, fmt.Errorf("failed to create forwarder: %w", err) + } + + go func() { + if err := forwarder.ForwardPorts(); err != nil { + // TODO(marun) Need better error handling here? Or is ok for test-only usage? + panic(err) + } + }() + + <-readyChan // Wait for port forwarding to be ready + + // Retrieve the dynamically allocated local port + forwardedPorts, err := forwarder.GetPorts() + if err != nil { + close(stopChan) + return 0, nil, fmt.Errorf("failed to get forwarded ports: %w", err) + } + if len(forwardedPorts) == 0 { + close(stopChan) + return 0, nil, fmt.Errorf("failed to find at least one forwarded port: %w", err) + } + return forwardedPorts[0].Local, stopChan, nil +} + +func WaitForPodIP(ctx context.Context, clientset kubernetes.Interface, namespace string, name string) (string, error) { + watch, err := clientset.CoreV1().Pods(namespace).Watch(ctx, metav1.SingleObject(metav1.ObjectMeta{Name: name})) + if err != nil { + return "", fmt.Errorf("failed to watch pod: %w", err) + } + for { + select { + case <-ctx.Done(): + return "", fmt.Errorf("failed to wait for pod IP before timeout: %w", ctx.Err()) + case event := <-watch.ResultChan(): + pod, ok := event.Object.(*corev1.Pod) + if !ok { + return "", fmt.Errorf("unexpected type: %T", event.Object) + } + if pod.Status.PodIP != "" { + return pod.Status.PodIP, nil + } + } + } +} diff --git a/tests/bootstrap/e2e/e2e_test.go b/tests/bootstrap/e2e/e2e_test.go new file mode 100644 index 000000000000..ea173cab8f13 --- /dev/null +++ b/tests/bootstrap/e2e/e2e_test.go @@ -0,0 +1,644 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package e2e + +import ( + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "runtime/debug" + "strings" + "testing" + + "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/utils/pointer" + + "github.com/ava-labs/avalanchego/api/info" + "github.com/ava-labs/avalanchego/config" + "github.com/ava-labs/avalanchego/ids" + "github.com/ava-labs/avalanchego/tests" + "github.com/ava-labs/avalanchego/tests/bootstrap" + "github.com/ava-labs/avalanchego/tests/fixture/e2e" + "github.com/ava-labs/avalanchego/tests/fixture/tmpnet" + "github.com/ava-labs/avalanchego/utils/constants" + "github.com/ava-labs/avalanchego/utils/logging" + + ginkgo "github.com/onsi/ginkgo/v2" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +const ( + imageCurrent = false + monitorImageCurrent = false + containerName = "avalanchego" + pvcSize = "128Mi" + initContainerName = "init" +) + +var ( + bootstrapIP string + bootstrapNodeID ids.NodeID +) + +func TestE2E(t *testing.T) { + ginkgo.RunSpecs(t, "bootstrap test suite") +} + +var _ = ginkgo.BeforeSuite(func() { + tc := e2e.NewTestContext() + require := require.New(tc) + + // TODO(marun) Support configuring the registry via a flag + imageName := "localhost:5001/avalanchego:latest" + bootstrapImageName := "localhost:5001/bootstrap-monitor:latest" + + relativePath := "tests/bootstrap/e2e" + // Need the repo root to determine the build dockerfile paths + repoRoot, err := getRepoRootPath(relativePath) + require.NoError(err) + + // Get the Go version from build info to for use in building the image + buildInfo, ok := debug.ReadBuildInfo() + require.True(ok, "Couldn't read build info") + goVersion := strings.TrimPrefix(buildInfo.GoVersion, "go") + + // Build the avalanchego image + if imageCurrent { + tc.Outf("{{yellow}}avalanchego image is up-to-date. skipping image build{{/}}\n") + } else { + ginkgo.By("Building the avalanchego image") + require.NoError(buildDockerImage( + tc.DefaultContext(), + goVersion, + repoRoot, + filepath.Join(repoRoot, "Dockerfile"), + imageName, + )) + + // TODO(marun) Figure out how to use the image without a local registry + ginkgo.By("Pushing avalanchego image to the local registry") + require.NoError(runCommand(exec.CommandContext(tc.DefaultContext(), "docker", "push", imageName))) + } + + if monitorImageCurrent { + tc.Outf("{{yellow}}bootstrap-monitor image is up-to-date. skipping image build{{/}}\n") + } else { + ginkgo.By("Building the bootstrap-monitor image") + require.NoError(buildDockerImage( + tc.DefaultContext(), + goVersion, + repoRoot, + filepath.Join(repoRoot, "tests", "bootstrap", "monitor", "Dockerfile"), + bootstrapImageName, + )) + + // TODO(marun) Figure out how to use the image without a local registry + ginkgo.By("Pushing bootstrap-monitor image to the local registry") + require.NoError(runCommand(exec.CommandContext(tc.DefaultContext(), "docker", "push", bootstrapImageName))) + } + + ginkgo.By("Configuring a kubernetes client") + kubeconfigPath := os.Getenv("KUBECONFIG") + kubeConfig, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) + require.NoError(err) + clientset, err := kubernetes.NewForConfig(kubeConfig) + require.NoError(err) + + // TODO(marun) Consider optionally deleting namespaces + + ginkgo.By("Creating a kube namespace to ensure isolation between test runs") + namespaceSpec := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bootstrap-test-e2e-", + }, + } + createdNamespace, err := clientset.CoreV1().Namespaces().Create(tc.DefaultContext(), namespaceSpec, metav1.CreateOptions{}) + require.NoError(err) + namespace := createdNamespace.Name + + flags := map[string]string{ + config.NetworkNameKey: constants.LocalName, + config.SybilProtectionEnabledKey: "false", + config.HealthCheckFreqKey: "500ms", // Ensure rapid detection of a healthy state + config.PublicIPKey: "127.0.0.1", // Ensure only ipv4 is used to ensure compatibility with client-go's port-forwarding facility + config.LogDisplayLevelKey: logging.Off.String(), // Display logging not needed since nodes run headless + config.LogLevelKey: logging.Debug.String(), + } + + // TODO(marun) Create a unique name for the node pod + ginkgo.By("Creating a pod for a single-node network") + networkPod, err := bootstrap.StartNodePod( + tc.DefaultContext(), + clientset, + namespace, + imageName, + pvcSize, + nil, + nil, + flags, + nil, + ) + require.NoError(err) + require.NoError(bootstrap.WaitForPodStatus( + tc.DefaultContext(), + clientset, + namespace, + networkPod.Name, + bootstrap.PodIsRunning, + )) + + bootstrapIP, err = WaitForPodIP(tc.DefaultContext(), clientset, namespace, networkPod.Name) + require.NoError(err) + + localPort, localPortStopChan, err := bootstrap.EnableLocalForwardForPod(kubeConfig, namespace, networkPod.Name, config.DefaultHTTPPort, ginkgo.GinkgoWriter, ginkgo.GinkgoWriter) + require.NoError(err) + ginkgo.DeferCleanup(func() { + close(localPortStopChan) + }) + + localNodeURI := fmt.Sprintf("http://127.0.0.1:%d", localPort) + + infoClient := info.NewClient(localNodeURI) + bootstrapNodeID, _, err = infoClient.GetNodeID(tc.DefaultContext()) + require.NoError(err) + + ginkgo.By("Waiting for the pod to report a healthy status at " + localNodeURI) + require.Eventually(func() bool { + healthReply, err := tmpnet.CheckNodeHealth(tc.DefaultContext(), localNodeURI) + if err != nil { + tc.Outf("Error checking node health: %v\n", err) + return false + } + return healthReply.Healthy + }, e2e.DefaultTimeout, e2e.DefaultPollingInterval) + + ginkgo.By("Creating a bootstrap tester deployment configured to test against the single-node network") + statefulSet := createBootstrapTester(tc, clientset, namespace, bootstrapIP, bootstrapNodeID, pvcSize, imageName, bootstrapImageName) + + ginkgo.By("Waiting for the init container to report the start of the bootstrap test") + var podName string + var podUUID types.UID + require.Eventually(func() bool { + pods, err := clientset.CoreV1().Pods(namespace).List(tc.DefaultContext(), metav1.ListOptions{}) + if err != nil { + tc.Outf("Error listing pods: %v\n", err) + return false + } + var targetPod corev1.Pod + for _, pod := range pods.Items { + if len(pod.ObjectMeta.OwnerReferences) > 0 && pod.ObjectMeta.OwnerReferences[0].Name == statefulSet.Name { + targetPod = pod + break + } + } + if len(targetPod.Name) == 0 { + return false + } + podName = targetPod.Name + podUUID = targetPod.UID + + // Check the container status + for _, containerStatus := range targetPod.Status.InitContainerStatuses { + if containerStatus.Name == initContainerName { + if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.ExitCode == 0 { + return true + } + break + } + } + return false + }, e2e.DefaultTimeout, e2e.DefaultPollingInterval) + + // Request the logs + req := clientset.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{ + Container: initContainerName, + }) + + // Stream the logs + readCloser, err := req.Stream(tc.DefaultContext()) + require.NoError(err) + defer readCloser.Close() + + // Marshal the logs into the versions type + bytes, err := io.ReadAll(readCloser) + require.NoError(err) + + // TODO(marun) Use a constant for the expected log message + require.Contains(string(bytes), "Starting bootstrap test") + + // Wait for the monitor container to report that bootstrap is complete + ginkgo.By("Waiting for the monitor container to report the success of the bootstrap test") + require.Eventually(func() bool { + pod, err := clientset.CoreV1().Pods(namespace).Get(tc.DefaultContext(), podName, metav1.GetOptions{}) + if err != nil { + tc.Outf("Error getting pod: %v\n", err) + return false + } + if pod.Status.Phase != corev1.PodRunning { + return false + } + + req := clientset.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{ + Container: "monitor", + }) + + // Stream the logs + readCloser, err := req.Stream(tc.DefaultContext()) + if err != nil { + tc.Outf("Error streaming logs: %v\n", err) + return false + } + defer readCloser.Close() + + // Marshal the logs into the versions type + bytes, err := io.ReadAll(readCloser) + if err != nil { + tc.Outf("Error reading logs: %v\n", err) + return false + } + + // TODO(marun) Use a constant for the expected log message + return strings.Contains(string(bytes), "Bootstrap completed successfully") + }, e2e.DefaultTimeout, e2e.DefaultPollingInterval) + + ginkgo.By("Deleting the pod to ensure that its replacement will resume bootstrapping with the existing image") + require.NoError(clientset.CoreV1().Pods(namespace).Delete(tc.DefaultContext(), podName, metav1.DeleteOptions{})) + var secondPodUID types.UID + require.Eventually(func() bool { + pod, err := clientset.CoreV1().Pods(namespace).Get(tc.DefaultContext(), podName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + // Pod has yet to be recreated yet + return false + } else if err != nil { + tc.Outf("Error getting pod: %v\n", err) + return false + } + if pod.UID == podUUID { + // The pod hasn't been replaced yet + return false + } + secondPodUID = pod.UID + // Check the container status + for _, containerStatus := range pod.Status.InitContainerStatuses { + if containerStatus.Name == initContainerName { + if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.ExitCode == 0 { + return true + } + break + } + } + return false + }, e2e.DefaultTimeout, e2e.DefaultPollingInterval) + + // Request the logs + req = clientset.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{ + Container: initContainerName, + }) + + // Stream the logs + readCloser, err = req.Stream(tc.DefaultContext()) + require.NoError(err) + defer readCloser.Close() + bytes, err = io.ReadAll(readCloser) + require.NoError(err) + // TODO(marun) Use a constant for the expected log message + require.Contains(string(bytes), "Resuming bootstrap test") + + // TODO(marun) Factor out the image build and push + ginkgo.By("Building and pushing a new avalanchego image to prompt the start of a new bootstrap test") + require.NoError(buildDockerImage( + tc.DefaultContext(), + goVersion, + repoRoot, + filepath.Join(repoRoot, "Dockerfile"), + imageName, + )) + + // TODO(marun) Figure out how to use the image without a local registry + require.NoError(runCommand(exec.CommandContext(tc.DefaultContext(), "docker", "push", imageName))) + + ginkgo.By("Waiting for a new pod to be created") + require.Eventually(func() bool { + pod, err := clientset.CoreV1().Pods(namespace).Get(tc.DefaultContext(), podName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + // Pod has yet to be recreated yet + return false + } else if err != nil { + tc.Outf("Error getting pod: %v\n", err) + return false + } + if pod.UID == secondPodUID { + // The pod hasn't been replaced yet + return false + } + // Check the container status + for _, containerStatus := range pod.Status.InitContainerStatuses { + if containerStatus.Name == initContainerName { + if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.ExitCode == 0 { + return true + } + break + } + } + return false + }, e2e.DefaultTimeout, e2e.DefaultPollingInterval) + + // TODO(marun) Factor out the check for the start of a bootstrap test + ginkgo.By("Waiting for the init container to report the start of a new bootstrap test") + + // Request the logs + req = clientset.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{ + Container: initContainerName, + }) + + // Stream the logs + readCloser, err = req.Stream(tc.DefaultContext()) + require.NoError(err) + defer readCloser.Close() + + // Marshal the logs into the versions type + bytes, err = io.ReadAll(readCloser) + require.NoError(err) + + // TODO(marun) Use a constant for the expected log message + require.Contains(string(bytes), "Starting bootstrap test") +}) + +var _ = ginkgo.Describe("[Bootstrap Tester]", func() { + ginkgo.It("should enable testing node bootstrap", func() {}) +}) + +// func waitForBootstrapComplete(tc tests.TestContext, clientset kubernetes.Interface, deployment *appsv1.Deployment) { +// require := require.New(tc) + +// // Wait for the deployment to complete +// require.NoError(bootstrap.WaitForDeploymentComplete(tc.DefaultContext(), clientset, deployment)) + +// // Check logs for 'starting' and 'success' messages and check that the bootstrapping node is actually reporting healthy +// } + +func createBootstrapTester(tc tests.TestContext, clientset kubernetes.Interface, namespace string, bootstrapIP string, bootstrapNodeID ids.NodeID, pvcSize string, nodeImage string, monitorImage string) *appsv1.StatefulSet { + require := require.New(tc) + + role := &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bootstrap-tester-role-", + }, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"pods"}, + Verbs: []string{"get", "create", "watch", "delete"}, + }, + { + APIGroups: []string{"apps"}, + Resources: []string{"statefulsets"}, + Verbs: []string{"get", "update"}, + }, + }, + } + createdRole, err := clientset.RbacV1().Roles(namespace).Create(tc.DefaultContext(), role, metav1.CreateOptions{}) + require.NoError(err) + + roleBinding := &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "bootstrap-tester-role-binding-", + }, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: "default", + Namespace: namespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + Kind: "Role", + Name: createdRole.Name, + APIGroup: "rbac.authorization.k8s.io", + }, + } + _, err = clientset.RbacV1().RoleBindings(namespace).Create(tc.DefaultContext(), roleBinding, metav1.CreateOptions{}) + require.NoError(err) + + // pvc := &corev1.PersistentVolumeClaim{ + // ObjectMeta: metav1.ObjectMeta{ + // GenerateName: "avalanche-pvc-", + // }, + // Spec: corev1.PersistentVolumeClaimSpec{ + // AccessModes: []corev1.PersistentVolumeAccessMode{ + // corev1.ReadWriteOncePod, + // }, + // Resources: corev1.VolumeResourceRequirements{ + // Requests: corev1.ResourceList{ + // corev1.ResourceStorage: resource.MustParse(pvcSize), + // }, + // }, + // }, + // } + // createdPVC, err := clientset.CoreV1().PersistentVolumeClaims(namespace).Create(tc.DefaultContext(), pvc, metav1.CreateOptions{}) + // require.NoError(err) + + // Ensure the pvc mount path matches the data dir for the node + dataPath := "/data" + flags := map[string]string{ + config.DataDirKey: dataPath + "/node", // Use a subdirectory to allow the use of RemoveAll when starting a new bootstrap + config.BootstrapIPsKey: bootstrapIP + ":9651", + config.BootstrapIDsKey: bootstrapNodeID.String(), + config.NetworkNameKey: constants.LocalName, + config.PublicIPKey: "127.0.0.1", // Ensure only ipv4 is used to ensure compatibility with client-go's port-forwarding facility + config.HealthCheckFreqKey: "500ms", // Ensure rapid detection of a healthy state + config.SybilProtectionEnabledKey: "false", + } + + monitorEnv := []corev1.EnvVar{ + { + Name: "POD_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.name", + }, + }, + }, + { + Name: "POD_NAMESPACE", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "metadata.namespace", + }, + }, + }, + } + + statefulSet := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bootstrap-tester", + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: pointer.Int32(1), + ServiceName: "bootstrap-tester", + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "bootstrap-tester", + }, + }, + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "node-data", + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{ + corev1.ReadWriteOnce, + }, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(pvcSize), + }, + }, + }, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": "bootstrap-tester", + }, + }, + Spec: corev1.PodSpec{ + InitContainers: []corev1.Container{ + { + Name: initContainerName, + Image: monitorImage, + Args: []string{ + "init", + // TODO(marun) Enable data path to be configured + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "node-data", + MountPath: dataPath, + }, + }, + Env: monitorEnv, + }, + }, + Containers: []corev1.Container{ + { + Name: "node", + Image: nodeImage, + Ports: []corev1.ContainerPort{ + { + Name: "http", + ContainerPort: 9650, + }, + { + Name: "staker", + ContainerPort: 9651, + }, + }, + VolumeMounts: []corev1.VolumeMount{ + { + Name: "node-data", + MountPath: dataPath, + }, + }, + Env: bootstrap.StringMapToEnvVarSlice(flags), + }, + { + Name: "monitor", + Image: monitorImage, + Args: []string{ + "wait-for-completion", + // TODO(marun) Enable data path to be configured + }, + Env: monitorEnv, + }, + }, + }, + }, + }, + } + + createdStatefulSet, err := clientset.AppsV1().StatefulSets(namespace).Create(tc.DefaultContext(), statefulSet, metav1.CreateOptions{}) + require.NoError(err) + return createdStatefulSet +} + +// Image build and push are implemented by calling the docker CLI instead of using the docker SDK to +// avoid bumping avalanchego runtime dependencies for the sake of an e2e test. +// +// TODO(marun) Update to use the docker SDK if/when reasonable + +func buildDockerImage(ctx context.Context, goVersion string, buildPath string, dockerfilePath string, imageName string) error { + // #nosec G204 + cmd := exec.CommandContext( + ctx, + "docker", + "build", + "-t", imageName, + "-f", dockerfilePath, + "--build-arg", "GO_VERSION="+goVersion, + buildPath, + ) + + return runCommand(cmd) +} + +// runCommand runs the provided command and captures output to stdout and stderr so if an +// error occurs the output can be included to provide sufficient detail for troubleshooting. +func runCommand(cmd *exec.Cmd) error { + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("Error: %w\nOutput:\n%s\n", err, output) + } + return nil +} + +func getRepoRootPath(suffix string) (string, error) { + // - When executed via a test binary, the working directory will be wherever + // the binary is executed from, but scripts should require execution from + // the repo root. + // + // - When executed via ginkgo (nicer for development + supports + // parallel execution) the working directory will always be the + // target path (e.g. [repo root]./tests/bootstrap/e2e) and getting the repo + // root will require stripping the target path suffix. + cwd, err := os.Getwd() + if err != nil { + return "", err + } + return strings.TrimSuffix(cwd, suffix), nil +} + +func WaitForPodIP(ctx context.Context, clientset kubernetes.Interface, namespace string, name string) (string, error) { + watch, err := clientset.CoreV1().Pods(namespace).Watch(ctx, metav1.SingleObject(metav1.ObjectMeta{Name: name})) + if err != nil { + return "", fmt.Errorf("failed to watch pod: %w", err) + } + for { + select { + case <-ctx.Done(): + return "", fmt.Errorf("failed to wait for pod IP before timeout: %w", ctx.Err()) + case event := <-watch.ResultChan(): + pod, ok := event.Object.(*corev1.Pod) + if !ok { + return "", fmt.Errorf("unexpected type: %T", event.Object) + } + if pod.Status.PodIP != "" { + return pod.Status.PodIP, nil + } + } + } +} diff --git a/tests/bootstrap/monitor/Dockerfile b/tests/bootstrap/monitor/Dockerfile new file mode 100644 index 000000000000..adbaa3f89e68 --- /dev/null +++ b/tests/bootstrap/monitor/Dockerfile @@ -0,0 +1,29 @@ +# The version is supplied as a build argument rather than hard-coded +# to minimize the cost of version changes. +ARG GO_VERSION + +# ============= Compilation Stage ================ +FROM golang:$GO_VERSION-bullseye AS builder + +WORKDIR /build + +# Copy the code into the container +COPY . . + +# Download avalanche dependencies using go mod +RUN go mod download + +# Build avalanchego. The build environment is configured with build_env.sh from the step +# enabling cross-compilation. +ARG RACE_FLAG="" +RUN ./scripts/build_bootstrap_monitor.sh + +# ============= Cleanup Stage ================ +FROM debian:11-slim AS execution + +WORKDIR /avalanchego + +# Copy the executables into the container +COPY --from=builder /build/build/bootstrap-monitor . + +ENTRYPOINT [ "./bootstrap-monitor" ] diff --git a/tests/bootstrap/monitor/main.go b/tests/bootstrap/monitor/main.go new file mode 100644 index 000000000000..5aab48e18c76 --- /dev/null +++ b/tests/bootstrap/monitor/main.go @@ -0,0 +1,296 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package main + +import ( + "context" + "errors" + "fmt" + "log" + "os" + "strings" + "time" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + + "github.com/ava-labs/avalanchego/tests/bootstrap" + "github.com/ava-labs/avalanchego/tests/fixture/tmpnet" + "github.com/ava-labs/avalanchego/utils/perms" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// TODO(marun) Make this configurable +const ( + monitorInterval = 5 * time.Second + nodeContainerName = "node" +) + +func main() { + args := os.Args + if len(args) < 2 { + log.Println("Usage: monitor ") + return + } + namespace := os.Getenv("POD_NAMESPACE") + podName := os.Getenv("POD_NAME") + + switch args[1] { + case "init": + initBootstrapTest(namespace, podName) + case "wait-for-completion": + waitForCompletion(namespace, podName) + default: + log.Println("Usage: monitor [init | wait-for-completion]") + return + } +} + +// TODO(marun) Make this more resilient? +func initBootstrapTest(namespace string, podName string) { + // Retrieve the image id used for the pod + clientset := getClientset() + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + log.Printf("Retrieving pod %s/%s for the image ID of its node container", namespace, podName) + pod, err := clientset.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + log.Printf("failed to get pod: %v", err) + return + } + var image string + for _, container := range pod.Spec.Containers { + if container.Name == nodeContainerName { + image = container.Image + break + } + } + if len(image) == 0 { + log.Printf("failed to get image for pod %s.%s", namespace, podName) + return + } + log.Printf("Image for node container: %s", image) + + imagePath := "/data/bootstrap_image.txt" + + startNewBootstrap := false + + var savedImage string + if _, err := os.Stat(imagePath); err != nil && os.IsNotExist(err) { + log.Println("Version file not found") + startNewBootstrap = true + } else if err != nil { + log.Printf("failed to check if version file exists: %v", err) + return + } else { + savedImageBytes, err := os.ReadFile(imagePath) + if err != nil { + log.Printf("failed to read version file: %v", err) + return + } + savedImage = string(savedImageBytes) + log.Printf("Image ID from version file: %s", savedImage) + } + + if image != savedImage { + startNewBootstrap = true + } + + if startNewBootstrap { + log.Println("Clearing /data/node directory") + if err := os.RemoveAll("/data/node"); err != nil { + log.Printf("failed to remove node directory: %v", err) + return + } + + log.Printf("Writing version file %s with latest image ID", imagePath) + if err := os.WriteFile(imagePath, []byte(image), perms.ReadWrite); err != nil { + log.Printf("failed to write version file: %v", err) + return + } + + log.Printf("Starting bootstrap test for image %q", image) + } else { + log.Println("Resuming bootstrap test") + } +} + +func getClientset() *kubernetes.Clientset { + kubeconfigPath := os.Getenv("KUBECONFIG") + kubeConfig, err := clientcmd.BuildConfigFromFlags("", kubeconfigPath) + if err != nil { + log.Fatalf("failed to build kubeconfig: %v", err) + } + clientset, err := kubernetes.NewForConfig(kubeConfig) + if err != nil { + log.Fatalf("failed to create clientset: %v", err) + } + return clientset +} + +func waitForCompletion(namespace string, podName string) { + ticker := time.NewTicker(monitorInterval) + defer ticker.Stop() + + clientset := getClientset() + + var statefulSetName string + completed := false + for { + // TODO(marun) Maybe move this to the end? + <-ticker.C + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + if healthy, err := tmpnet.CheckNodeHealth(ctx, "http://localhost:9650"); err != nil { + log.Printf("failed to wait for node health: %v", err) + continue + } else if healthy.Healthy { + if !completed { + log.Println("Bootstrap completed successfully") + completed = true + } + + if len(statefulSetName) == 0 { + statefulSetName, err = getStatefulSetName(ctx, clientset, namespace, podName) + if err != nil { + log.Printf("failed to get StatefulSet name: %v", err) + continue + } + } + statefulSet, err := clientset.AppsV1().StatefulSets(namespace).Get(ctx, statefulSetName, metav1.GetOptions{}) + if err != nil { + log.Printf("failed to get deployment: %v", err) + continue + } + var nodeContainer corev1.Container + for _, container := range statefulSet.Spec.Template.Spec.Containers { + if container.Name == "node" { + nodeContainer = container + break + } + } + if len(nodeContainer.Name) == 0 { + log.Println("failed to find container image for node container") + continue + } + latestImageID, err := getLatestImageID(ctx, clientset, namespace, nodeContainer.Image) + if err != nil { + log.Printf("failed to get latest image id: %v", err) + continue + } + if nodeContainer.Image == latestImageID { + log.Printf("Latest image %s has already bootstrapped successfully", latestImageID) + continue + } + + log.Printf("Found new image to test: %s", latestImageID) + + // Update the statefulset to use the latest image id + statefulSet.Spec.Template.Spec.Containers[0].Image = latestImageID + if _, err = clientset.AppsV1().StatefulSets(namespace).Update(ctx, statefulSet, metav1.UpdateOptions{}); err != nil { + log.Printf("failed to update deployment: %v", err) + continue + } + log.Printf("Updated deployment to target latest image: %s", latestImageID) + break + } + } + + // Expect the pod to be terminated within 5 minutes in preparation for being rescheduled + time.Sleep(5 * time.Minute) +} + +func getStatefulSetName(ctx context.Context, clientset *kubernetes.Clientset, namespace string, podName string) (string, error) { + pod, err := clientset.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + return "", fmt.Errorf("failed to get pod: %w", err) + } + if len(pod.OwnerReferences) == 0 { + return "", errors.New("pod has no owner references") + } + return pod.OwnerReferences[0].Name, nil +} + +func getLatestImageID( + ctx context.Context, + clientset *kubernetes.Clientset, + namespace string, + imageName string, +) (string, error) { + // TODO(marun) Parse the image name more robustly? + var baseImageName string + if strings.Contains(imageName, "@") { + baseImageName = strings.Split(imageName, "@")[0] + } else { + imageNameParts := strings.Split(imageName, ":") + switch len(imageNameParts) { + case 1: + baseImageName = imageName + case 2: + baseImageName = imageNameParts[0] + log.Printf("Derived image name of %q from %q", baseImageName, imageName) + case 3: + baseImageName = strings.Join(imageNameParts[0:2], ":") + default: + return "", fmt.Errorf("unexpected image name format: %q", imageName) + } + } + + containerName := "node" + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "avalanchego-version-check-", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: containerName, + Command: []string{"./avalanchego"}, + Args: []string{"--version"}, + Image: baseImageName + ":latest", + }, + }, + RestartPolicy: corev1.RestartPolicyNever, + }, + } + createdPod, err := clientset.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{}) + if err != nil { + return "", err + } + + err = bootstrap.WaitForPodStatus(ctx, clientset, namespace, createdPod.Name, bootstrap.PodHasTerminated) + if err != nil { + return "", fmt.Errorf("failed to wait for pod termination: %w", err) + } + + terminatedPod, err := clientset.CoreV1().Pods(namespace).Get(ctx, createdPod.Name, metav1.GetOptions{}) + if err != nil { + return "", fmt.Errorf("failed to load terminated pod: %w", err) + } + + // Get the image id for the avalanchego image + imageID := "" + for _, status := range terminatedPod.Status.ContainerStatuses { + if status.Name == containerName { + imageID = status.ImageID + break + } + } + if len(imageID) == 0 { + return "", fmt.Errorf("failed to get image id for pod %s.%s", namespace, createdPod.Name) + } + + // Only delete the pod if successful to aid in debugging + err = clientset.CoreV1().Pods(namespace).Delete(ctx, createdPod.Name, metav1.DeleteOptions{}) + if err != nil { + return "", err + } + + return imageID, nil +} diff --git a/tests/fixture/tmpnet/node_process.go b/tests/fixture/tmpnet/node_process.go index 40ce055c3bbe..7074601922f2 100644 --- a/tests/fixture/tmpnet/node_process.go +++ b/tests/fixture/tmpnet/node_process.go @@ -176,7 +176,11 @@ func (p *NodeProcess) IsHealthy(ctx context.Context) (bool, error) { return false, errNotRunning } - return CheckNodeHealth(ctx, p.node.URI) + healthReply, err := CheckNodeHealth(ctx, p.node.URI) + if err != nil { + return false, err + } + return healthReply.Healthy, nil } func (p *NodeProcess) getProcessContextPath() string { diff --git a/tests/fixture/tmpnet/utils.go b/tests/fixture/tmpnet/utils.go index 71aa6e61a47d..f2613f462208 100644 --- a/tests/fixture/tmpnet/utils.go +++ b/tests/fixture/tmpnet/utils.go @@ -20,27 +20,27 @@ const ( DefaultNodeTickerInterval = 50 * time.Millisecond ) -func CheckNodeHealth(ctx context.Context, uri string) (bool, error) { +func CheckNodeHealth(ctx context.Context, uri string) (*health.APIReply, error) { // Check that the node is reporting healthy - health, err := health.NewClient(uri).Health(ctx, nil) + healthReply, err := health.NewClient(uri).Health(ctx, nil) if err == nil { - return health.Healthy, nil + return healthReply, nil } switch t := err.(type) { case *net.OpError: if t.Op == "read" { // Connection refused - potentially recoverable - return false, nil + return nil, nil } case syscall.Errno: if t == syscall.ECONNREFUSED { // Connection refused - potentially recoverable - return false, nil + return nil, nil } } // Assume all other errors are not recoverable - return false, fmt.Errorf("failed to query node health: %w", err) + return nil, fmt.Errorf("failed to query node health: %w", err) } // WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed.