Skip to content

Commit

Permalink
[testing] Add a job to test state sync bootstrap of testnet
Browse files Browse the repository at this point in the history
  • Loading branch information
marun committed Jul 9, 2024
1 parent 2144e36 commit 260ab20
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 8 deletions.
1 change: 1 addition & 0 deletions .github/actionlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ self-hosted-runner:
- custom-arm64-focal
- custom-arm64-jammy
- net-outage-sim
- avalanche-avalanchego
48 changes: 48 additions & 0 deletions .github/actions/check-bootstrap/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: 'Check bootstrap for a network and state sync configuration'
description: 'Checks that bootstrap is possible for the given network and state sync configuration'

inputs:
network_id:
required: true
state_sync_enabled:
required: true
prometheus_id:
required: true
prometheus_password:
required: true
loki_id:
required: true
loki_password:
required: true

runs:
using: composite
steps:
- name: Setup Go
uses: ./.github/actions/setup-go-for-project

- name: Build AvalancheGo Binary
shell: bash
run: ./scripts/build.sh -r

- name: Check avalanchego version
shell: bash
run: ./build/avalanchego --version

- name: Run bootstrap for testnet with state-sync
uses: ./.github/actions/run-monitored-tmpnet-cmd
with:
run: go run ./tests/bootstrap --avalanchego-path=./build/avalanchego --network-id=${{ inputs.network_id }} --state-sync-enabled=${{ inputs.state_sync_enabled }}
prometheus_id: ${{ inputs.prometheus_id }}
prometheus_password: ${{ inputs.prometheus_password }}
loki_id: ${{ inputs.loki_id }}
loki_password: ${{ inputs.loki_password }}

# Skip creation of an artifact in favor of log collection to loki

- name: Check size of tmpnet path
if: always()
shell: bash
run: |
echo "Checking tmpnet disk usage:"
du -sh ~/.tmpnet
26 changes: 26 additions & 0 deletions .github/workflows/check-bootstrap-testnet-state-sync.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: 'Check Bootstrap (testnet,state-sync)'

on:
# TODO(marun) Add a schedule
workflow_dispatch:

# TODO(marun) For testing only - remove before merge
pull_request:

jobs:
check_bootstrap_testnet_state_sync:
name: Check Bootstrap (testnet,state-sync)
runs-on: avalanche-avalanchego
timeout-minutes: 4320 # 3 days
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Check bootstrap (testnet,state-sync)
uses: ./.github/actions/check-bootstrap
with:
network_id: 5 # testnet
state_sync_enabled: true
prometheus_id: ${{ secrets.PROMETHEUS_ID || '' }}
prometheus_password: ${{ secrets.PROMETHEUS_PASSWORD || '' }}
loki_id: ${{ secrets.LOKI_ID || '' }}
loki_password: ${{ secrets.LOKI_PASSWORD || '' }}
106 changes: 106 additions & 0 deletions tests/bootstrap/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.

package main

import (
"context"
"flag"
"fmt"
"log"
"os"
"time"

"github.com/google/uuid"

"github.com/ava-labs/avalanchego/config"
"github.com/ava-labs/avalanchego/tests/fixture/tmpnet"
"github.com/ava-labs/avalanchego/utils/logging"
)

// Simple test that starts a single node and waits for it to finish bootstrapping.

func main() {
avalanchegoPath := flag.String("avalanchego-path", "", "The path to an avalanchego binary")
networkID := flag.Int64("network-id", 0, "The ID of the network to bootstrap from")
stateSyncEnabled := flag.Bool("state-sync-enabled", false, "Whether state syncing should be enabled")
maxDuration := flag.Duration("max-duration", time.Hour*72, "The maximum duration the network should run for")

flag.Parse()

if len(*avalanchegoPath) == 0 {
log.Fatal("avalanchego-path is required")
}
if *networkID == 0 {
log.Fatal("network-id is required")
}
if *maxDuration == 0 {
log.Fatal("max-duration is required")
}

if err := checkBootstrap(*avalanchegoPath, uint32(*networkID), *stateSyncEnabled, *maxDuration); err != nil {
log.Fatalf("Failed to check bootstrap: %v\n", err)
}
}

func checkBootstrap(avalanchegoPath string, networkID uint32, stateSyncEnabled bool, maxDuration time.Duration) error {
flags := tmpnet.DefaultLocalhostFlags()
flags.SetDefaults(tmpnet.FlagsMap{
config.HealthCheckFreqKey: "30s",
// Minimize logging overhead
config.LogDisplayLevelKey: logging.Off.String(),
config.LogLevelKey: logging.Info.String(),
})

// Create a new single-node network that will bootstrap from the specified network
network := &tmpnet.Network{
UUID: uuid.NewString(),
NetworkID: networkID,
Owner: "bootstrap-test",
Nodes: tmpnet.NewNodesOrPanic(1),
DefaultFlags: flags,
DefaultRuntimeConfig: tmpnet.NodeRuntimeConfig{
// TODO(marun) Rename AvalancheGoPath to AvalanchegoPath
AvalancheGoPath: avalanchegoPath,
},
ChainConfigs: map[string]tmpnet.FlagsMap{
"C": {
"state-sync-enabled": stateSyncEnabled,
},
},
}

if err := network.Create(""); err != nil {
return fmt.Errorf("failed to create network: %w", err)
}
node := network.Nodes[0]

log.Printf("Starting node in path %s (UUID: %s)\n", network.Dir, network.UUID)

ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout)
defer cancel()
if err := network.StartNode(ctx, os.Stdout, node); err != nil {
return fmt.Errorf("failed to start node: %w", err)
}
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), tmpnet.DefaultNetworkTimeout)
defer cancel()
if err := node.Stop(ctx); err != nil {
log.Printf("Failed to stop node: %v\n", err)
}
}()

log.Printf("Metrics: %s\n", tmpnet.DefaultMetricsLink(network.UUID, time.Now()))

log.Print("Waiting for node to indicate bootstrap complete by reporting healthy\n")

// Avoid checking too often to avoid log spam
healthCheckInterval := 1 * time.Minute

ctx, cancel = context.WithTimeout(context.Background(), maxDuration)
defer cancel()
if err := tmpnet.WaitForHealthyWithInterval(ctx, node, healthCheckInterval); err != nil {
return fmt.Errorf("node failed to become healthy before timeout: %w", err)
}
return nil
}
13 changes: 10 additions & 3 deletions tests/fixture/tmpnet/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,27 @@ func DefaultTestFlags() FlagsMap {
}
}

// Flags appropriate for networks that aren't intended to be publicly accessible.
func DefaultLocalhostFlags() FlagsMap {
return FlagsMap{
config.PublicIPKey: "127.0.0.1",
config.HTTPHostKey: "127.0.0.1",
config.StakingHostKey: "127.0.0.1",
}
}

// Flags appropriate for tmpnet networks.
func DefaultTmpnetFlags() FlagsMap {
// Supply only non-default configuration to ensure that default values will be used.
flags := FlagsMap{
// Specific to tmpnet deployment
config.PublicIPKey: "127.0.0.1",
config.HTTPHostKey: "127.0.0.1",
config.StakingHostKey: "127.0.0.1",
config.LogDisplayLevelKey: logging.Off.String(), // Display logging not needed since nodes run headless
config.LogLevelKey: logging.Debug.String(),
// Specific to e2e testing
config.MinStakeDurationKey: DefaultMinStakeDuration.String(),
config.ProposerVMUseCurrentHeightKey: true,
}
flags.SetDefaults(DefaultLocalhostFlags())
flags.SetDefaults(DefaultTestFlags())
return flags
}
Expand Down
6 changes: 5 additions & 1 deletion tests/fixture/tmpnet/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ func (n *Network) StartNodes(ctx context.Context, w io.Writer, nodesToStart ...*
return err
}
// Provide a link to the main dashboard filtered by the uuid and showing results from now till whenever the link is viewed
if _, err := fmt.Fprintf(w, "\nMetrics: https://grafana-experimental.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?&var-filter=network_uuid%%7C%%3D%%7C%s&var-filter=is_ephemeral_node%%7C%%3D%%7Cfalse&from=%d&to=now\n", n.UUID, startTime.UnixMilli()); err != nil {
if _, err := fmt.Fprintf(w, "\nMetrics: %s\n", DefaultMetricsLink(n.UUID, startTime)); err != nil {
return err
}

Expand Down Expand Up @@ -906,3 +906,7 @@ func GetReusableNetworkPathForOwner(owner string) (string, error) {
}
return filepath.Join(networkPath, "latest_"+owner), nil
}

func DefaultMetricsLink(uuid string, startTime time.Time) string {
return fmt.Sprintf("https://grafana-experimental.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?&var-filter=network_uuid%%7C%%3D%%7C%s&var-filter=is_ephemeral_node%%7C%%3D%%7Cfalse&from=%d&to=now", uuid, startTime.UnixMilli())
}
10 changes: 8 additions & 2 deletions tests/fixture/tmpnet/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,14 @@ func (n *Node) SetNetworkingConfig(bootstrapIDs []string, bootstrapIPs []string)
// Default to dynamic port allocation
n.Flags[config.StakingPortKey] = 0
}
n.Flags[config.BootstrapIDsKey] = strings.Join(bootstrapIDs, ",")
n.Flags[config.BootstrapIPsKey] = strings.Join(bootstrapIPs, ",")
if len(bootstrapIDs) == 0 {
// bootstrap-* should not be provided if bootstrapping from mainnet or testnet
delete(n.Flags, config.BootstrapIDsKey)
delete(n.Flags, config.BootstrapIPsKey)
} else {
n.Flags[config.BootstrapIDsKey] = strings.Join(bootstrapIDs, ",")
n.Flags[config.BootstrapIPsKey] = strings.Join(bootstrapIPs, ",")
}
}

// Ensures staking and signing keys are generated if not already present and
Expand Down
9 changes: 7 additions & 2 deletions tests/fixture/tmpnet/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,17 @@ const (

var ErrNotRunning = errors.New("not running")

// WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed.
// WaitForHealthy with the default interval.
func WaitForHealthy(ctx context.Context, node *Node) error {
return WaitForHealthyWithInterval(ctx, node, DefaultNodeTickerInterval)
}

// WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed.
func WaitForHealthyWithInterval(ctx context.Context, node *Node, interval time.Duration) error {
if _, ok := ctx.Deadline(); !ok {
return fmt.Errorf("unable to wait for health for node %q with a context without a deadline", node.NodeID)
}
ticker := time.NewTicker(DefaultNodeTickerInterval)
ticker := time.NewTicker(interval)
defer ticker.Stop()

for {
Expand Down

0 comments on commit 260ab20

Please sign in to comment.