From 260ab204b5c9ae37a09a9c3a267a7c53b40caa3a Mon Sep 17 00:00:00 2001 From: Maru Newby Date: Mon, 1 Jul 2024 08:15:49 +0200 Subject: [PATCH] [testing] Add a job to test state sync bootstrap of testnet --- .github/actionlint.yml | 1 + .github/actions/check-bootstrap/action.yml | 48 ++++++++ .../check-bootstrap-testnet-state-sync.yml | 26 +++++ tests/bootstrap/main.go | 106 ++++++++++++++++++ tests/fixture/tmpnet/defaults.go | 13 ++- tests/fixture/tmpnet/network.go | 6 +- tests/fixture/tmpnet/node.go | 10 +- tests/fixture/tmpnet/utils.go | 9 +- 8 files changed, 211 insertions(+), 8 deletions(-) create mode 100644 .github/actions/check-bootstrap/action.yml create mode 100644 .github/workflows/check-bootstrap-testnet-state-sync.yml create mode 100644 tests/bootstrap/main.go diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 2e6d753c2282..2d3f1a35ef04 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -3,3 +3,4 @@ self-hosted-runner: - custom-arm64-focal - custom-arm64-jammy - net-outage-sim + - avalanche-avalanchego diff --git a/.github/actions/check-bootstrap/action.yml b/.github/actions/check-bootstrap/action.yml new file mode 100644 index 000000000000..3a4376989e74 --- /dev/null +++ b/.github/actions/check-bootstrap/action.yml @@ -0,0 +1,48 @@ +name: 'Check bootstrap for a network and state sync configuration' +description: 'Checks that bootstrap is possible for the given network and state sync configuration' + +inputs: + network_id: + required: true + state_sync_enabled: + required: true + prometheus_id: + required: true + prometheus_password: + required: true + loki_id: + required: true + loki_password: + required: true + +runs: + using: composite + steps: + - name: Setup Go + uses: ./.github/actions/setup-go-for-project + + - name: Build AvalancheGo Binary + shell: bash + run: ./scripts/build.sh -r + + - name: Check avalanchego version + shell: bash + run: ./build/avalanchego --version + + - name: Run bootstrap for testnet with state-sync + uses: ./.github/actions/run-monitored-tmpnet-cmd + with: + run: go run ./tests/bootstrap --avalanchego-path=./build/avalanchego --network-id=${{ inputs.network_id }} --state-sync-enabled=${{ inputs.state_sync_enabled }} + prometheus_id: ${{ inputs.prometheus_id }} + prometheus_password: ${{ inputs.prometheus_password }} + loki_id: ${{ inputs.loki_id }} + loki_password: ${{ inputs.loki_password }} + + # Skip creation of an artifact in favor of log collection to loki + + - name: Check size of tmpnet path + if: always() + shell: bash + run: | + echo "Checking tmpnet disk usage:" + du -sh ~/.tmpnet diff --git a/.github/workflows/check-bootstrap-testnet-state-sync.yml b/.github/workflows/check-bootstrap-testnet-state-sync.yml new file mode 100644 index 000000000000..802f64c53629 --- /dev/null +++ b/.github/workflows/check-bootstrap-testnet-state-sync.yml @@ -0,0 +1,26 @@ +name: 'Check Bootstrap (testnet,state-sync)' + +on: + # TODO(marun) Add a schedule + workflow_dispatch: + + # TODO(marun) For testing only - remove before merge + pull_request: + +jobs: + check_bootstrap_testnet_state_sync: + name: Check Bootstrap (testnet,state-sync) + runs-on: avalanche-avalanchego + timeout-minutes: 4320 # 3 days + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + - name: Check bootstrap (testnet,state-sync) + uses: ./.github/actions/check-bootstrap + with: + network_id: 5 # testnet + state_sync_enabled: true + prometheus_id: ${{ secrets.PROMETHEUS_ID || '' }} + prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }} + loki_id: ${{ secrets.LOKI_ID || '' }} + loki_password: ${{ secrets.LOKI_PASSWORD || '' }} diff --git a/tests/bootstrap/main.go b/tests/bootstrap/main.go new file mode 100644 index 000000000000..289a7846b4cd --- /dev/null +++ b/tests/bootstrap/main.go @@ -0,0 +1,106 @@ +// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package main + +import ( + "context" + "flag" + "fmt" + "log" + "os" + "time" + + "github.com/google/uuid" + + "github.com/ava-labs/avalanchego/config" + "github.com/ava-labs/avalanchego/tests/fixture/tmpnet" + "github.com/ava-labs/avalanchego/utils/logging" +) + +// Simple test that starts a single node and waits for it to finish bootstrapping. + +func main() { + avalanchegoPath := flag.String("avalanchego-path", "", "The path to an avalanchego binary") + networkID := flag.Int64("network-id", 0, "The ID of the network to bootstrap from") + stateSyncEnabled := flag.Bool("state-sync-enabled", false, "Whether state syncing should be enabled") + maxDuration := flag.Duration("max-duration", time.Hour*72, "The maximum duration the network should run for") + + flag.Parse() + + if len(*avalanchegoPath) == 0 { + log.Fatal("avalanchego-path is required") + } + if *networkID == 0 { + log.Fatal("network-id is required") + } + if *maxDuration == 0 { + log.Fatal("max-duration is required") + } + + if err := checkBootstrap(*avalanchegoPath, uint32(*networkID), *stateSyncEnabled, *maxDuration); err != nil { + log.Fatalf("Failed to check bootstrap: %v\n", err) + } +} + +func checkBootstrap(avalanchegoPath string, networkID uint32, stateSyncEnabled bool, maxDuration time.Duration) error { + flags := tmpnet.DefaultLocalhostFlags() + flags.SetDefaults(tmpnet.FlagsMap{ + config.HealthCheckFreqKey: "30s", + // Minimize logging overhead + config.LogDisplayLevelKey: logging.Off.String(), + config.LogLevelKey: logging.Info.String(), + }) + + // Create a new single-node network that will bootstrap from the specified network + network := &tmpnet.Network{ + UUID: uuid.NewString(), + NetworkID: networkID, + Owner: "bootstrap-test", + Nodes: tmpnet.NewNodesOrPanic(1), + DefaultFlags: flags, + DefaultRuntimeConfig: tmpnet.NodeRuntimeConfig{ + // TODO(marun) Rename AvalancheGoPath to AvalanchegoPath + AvalancheGoPath: avalanchegoPath, + }, + ChainConfigs: map[string]tmpnet.FlagsMap{ + "C": { + "state-sync-enabled": stateSyncEnabled, + }, + }, + } + + if err := network.Create(""); err != nil { + return fmt.Errorf("failed to create network: %w", err) + } + node := network.Nodes[0] + + log.Printf("Starting node in path %s (UUID: %s)\n", network.Dir, network.UUID) + + ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout) + defer cancel() + if err := network.StartNode(ctx, os.Stdout, node); err != nil { + return fmt.Errorf("failed to start node: %w", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout) + defer cancel() + if err := node.Stop(ctx); err != nil { + log.Printf("Failed to stop node: %v\n", err) + } + }() + + log.Printf("Metrics: %s\n", tmpnet.DefaultMetricsLink(network.UUID, time.Now())) + + log.Print("Waiting for node to indicate bootstrap complete by reporting healthy\n") + + // Avoid checking too often to avoid log spam + healthCheckInterval := 1 * time.Minute + + ctx, cancel = context.WithTimeout(context.Background(), maxDuration) + defer cancel() + if err := tmpnet.WaitForHealthyWithInterval(ctx, node, healthCheckInterval); err != nil { + return fmt.Errorf("node failed to become healthy before timeout: %w", err) + } + return nil +} diff --git a/tests/fixture/tmpnet/defaults.go b/tests/fixture/tmpnet/defaults.go index c5dbfeeebc96..4c19ea08b113 100644 --- a/tests/fixture/tmpnet/defaults.go +++ b/tests/fixture/tmpnet/defaults.go @@ -46,20 +46,27 @@ func DefaultTestFlags() FlagsMap { } } +// Flags appropriate for networks that aren't intended to be publicly accessible. +func DefaultLocalhostFlags() FlagsMap { + return FlagsMap{ + config.PublicIPKey: "127.0.0.1", + config.HTTPHostKey: "127.0.0.1", + config.StakingHostKey: "127.0.0.1", + } +} + // Flags appropriate for tmpnet networks. func DefaultTmpnetFlags() FlagsMap { // Supply only non-default configuration to ensure that default values will be used. flags := FlagsMap{ // Specific to tmpnet deployment - config.PublicIPKey: "127.0.0.1", - config.HTTPHostKey: "127.0.0.1", - config.StakingHostKey: "127.0.0.1", config.LogDisplayLevelKey: logging.Off.String(), // Display logging not needed since nodes run headless config.LogLevelKey: logging.Debug.String(), // Specific to e2e testing config.MinStakeDurationKey: DefaultMinStakeDuration.String(), config.ProposerVMUseCurrentHeightKey: true, } + flags.SetDefaults(DefaultLocalhostFlags()) flags.SetDefaults(DefaultTestFlags()) return flags } diff --git a/tests/fixture/tmpnet/network.go b/tests/fixture/tmpnet/network.go index 63796be267a4..1f710436f236 100644 --- a/tests/fixture/tmpnet/network.go +++ b/tests/fixture/tmpnet/network.go @@ -362,7 +362,7 @@ func (n *Network) StartNodes(ctx context.Context, w io.Writer, nodesToStart ...* return err } // Provide a link to the main dashboard filtered by the uuid and showing results from now till whenever the link is viewed - if _, err := fmt.Fprintf(w, "\nMetrics: https://grafana-experimental.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?&var-filter=network_uuid%%7C%%3D%%7C%s&var-filter=is_ephemeral_node%%7C%%3D%%7Cfalse&from=%d&to=now\n", n.UUID, startTime.UnixMilli()); err != nil { + if _, err := fmt.Fprintf(w, "\nMetrics: %s\n", DefaultMetricsLink(n.UUID, startTime)); err != nil { return err } @@ -906,3 +906,7 @@ func GetReusableNetworkPathForOwner(owner string) (string, error) { } return filepath.Join(networkPath, "latest_"+owner), nil } + +func DefaultMetricsLink(uuid string, startTime time.Time) string { + return fmt.Sprintf("https://grafana-experimental.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?&var-filter=network_uuid%%7C%%3D%%7C%s&var-filter=is_ephemeral_node%%7C%%3D%%7Cfalse&from=%d&to=now", uuid, startTime.UnixMilli()) +} diff --git a/tests/fixture/tmpnet/node.go b/tests/fixture/tmpnet/node.go index 3a6076af1283..cbd4878433a5 100644 --- a/tests/fixture/tmpnet/node.go +++ b/tests/fixture/tmpnet/node.go @@ -236,8 +236,14 @@ func (n *Node) SetNetworkingConfig(bootstrapIDs []string, bootstrapIPs []string) // Default to dynamic port allocation n.Flags[config.StakingPortKey] = 0 } - n.Flags[config.BootstrapIDsKey] = strings.Join(bootstrapIDs, ",") - n.Flags[config.BootstrapIPsKey] = strings.Join(bootstrapIPs, ",") + if len(bootstrapIDs) == 0 { + // bootstrap-* should not be provided if bootstrapping from mainnet or testnet + delete(n.Flags, config.BootstrapIDsKey) + delete(n.Flags, config.BootstrapIPsKey) + } else { + n.Flags[config.BootstrapIDsKey] = strings.Join(bootstrapIDs, ",") + n.Flags[config.BootstrapIPsKey] = strings.Join(bootstrapIPs, ",") + } } // Ensures staking and signing keys are generated if not already present and diff --git a/tests/fixture/tmpnet/utils.go b/tests/fixture/tmpnet/utils.go index ba32ed3d4341..e538fd6fe521 100644 --- a/tests/fixture/tmpnet/utils.go +++ b/tests/fixture/tmpnet/utils.go @@ -20,12 +20,17 @@ const ( var ErrNotRunning = errors.New("not running") -// WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed. +// WaitForHealthy with the default interval. func WaitForHealthy(ctx context.Context, node *Node) error { + return WaitForHealthyWithInterval(ctx, node, DefaultNodeTickerInterval) +} + +// WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed. +func WaitForHealthyWithInterval(ctx context.Context, node *Node, interval time.Duration) error { if _, ok := ctx.Deadline(); !ok { return fmt.Errorf("unable to wait for health for node %q with a context without a deadline", node.NodeID) } - ticker := time.NewTicker(DefaultNodeTickerInterval) + ticker := time.NewTicker(interval) defer ticker.Stop() for {