From fab06d6875db2e14686e24cc91f45492279b281f Mon Sep 17 00:00:00 2001 From: Josh Wilson Date: Tue, 12 Mar 2024 15:05:44 +0900 Subject: [PATCH] ci: run sn_client with network Adds these tests to PR runs, sets the proper location for them for nightly --- .github/workflows/merge.yml | 8 + .github/workflows/nightly.yml | 265 ++++++++++++++++++---------------- 2 files changed, 146 insertions(+), 127 deletions(-) diff --git a/.github/workflows/merge.yml b/.github/workflows/merge.yml index cd22653d54..65a3964276 100644 --- a/.github/workflows/merge.yml +++ b/.github/workflows/merge.yml @@ -97,6 +97,7 @@ jobs: timeout-minutes: 25 run: cargo test --release --package sn_cli -- --skip test_acc_packet_ + # We do not run client `--tests` here as they can require a network - name: Run client tests timeout-minutes: 25 run: | @@ -366,6 +367,13 @@ jobs: echo "SAFE_PEERS has been set to $SAFE_PEERS" fi + # only these unit tests require a network, the rest are run above + - name: Run sn_client --tests + run: cargo test --package sn_client --release --tests + env: + SN_LOG: "all" + timeout-minutes: 15 + - name: Create and fund a wallet to pay for files storage run: | cargo run --bin faucet --release -- --log-output-dest=data-dir send 1000000 $(cargo run --bin safe --release -- --log-output-dest=data-dir wallet address | tail -n 1) | tail -n 1 > transfer_hex diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 927e74f42c..053dac282f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -2,7 +2,7 @@ name: Nightly -- Full Network Tests on: schedule: - - cron: '0 0 * * *' + - cron: "0 0 * * *" workflow_dispatch: env: @@ -24,7 +24,7 @@ jobs: - uses: Swatinem/rust-cache@v2 continue-on-error: true - + - name: Build binaries run: cargo build --release --bin safenode --bin safe --bin faucet timeout-minutes: 30 @@ -43,6 +43,13 @@ jobs: shell: bash run: echo "Peer is $SAFE_PEERS" + # only these unit tests require a network, the rest are run above in unit test section + - name: Run sn_client --tests + run: cargo test --bin sn_client --release --tests + env: + SN_LOG: "all" + timeout-minutes: 15 + - name: Create and fund a wallet to pay for files storage run: | cargo run --bin faucet --release -- --log-output-dest=data-dir send 1000000 $(cargo run --bin safe --release -- --log-output-dest=data-dir wallet address | tail -n 1) | tail -n 1 > transfer_hex @@ -121,7 +128,7 @@ jobs: continue-on-error: true - name: Build unit tests before running - run: cargo test --release --lib --bins --no-run + run: cargo test --release --lib --bins --no-run timeout-minutes: 30 - name: Run CLI tests @@ -130,9 +137,13 @@ jobs: - name: Run client tests timeout-minutes: 25 + # we do not run the `--tests` here are they are run in the e2e job + # as they rquire a network run: | - cargo test --release --package sn_client --lib cargo test --release --package sn_client --doc + cargo test --release --package sn_client --lib + cargo test --release --package sn_client --bins + cargo test --release --package sn_client --examples - name: Run network tests timeout-minutes: 25 @@ -162,7 +173,7 @@ jobs: SLACK_TITLE: "Nightly Unit Test Run Failed" gossipsub: - if: "!startsWith(github.event.head_commit.message, 'chore(release):')" + if: "!startsWith(github.event.head_commit.message, 'chore(release):')" name: Gossipsub E2E tests runs-on: ${{ matrix.os }} strategy: @@ -197,7 +208,7 @@ jobs: platform: ${{ matrix.os }} build: true - - name: Gossipsub - nodes to subscribe to topics, and publish messages + - name: Gossipsub - nodes to subscribe to topics, and publish messages run: cargo test --release -p sn_node --features local-discovery --test msgs_over_gossipsub -- --nocapture env: CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} @@ -256,7 +267,7 @@ jobs: timeout-minutes: 10 - name: execute the storage payment tests - run: cargo test --release -p sn_node --features="local-discovery" --test storage_payments -- --nocapture --test-threads=1 + run: cargo test --release -p sn_node --features="local-discovery" --test storage_payments -- --nocapture --test-threads=1 env: CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} SN_LOG: "all" @@ -265,7 +276,7 @@ jobs: - name: Small wait to allow reward receipt run: sleep 30 timeout-minutes: 1 - + - name: Stop the local network and upload logs if: always() uses: maidsafe/sn-local-testnet-action@main @@ -330,7 +341,7 @@ jobs: - name: Small wait to allow reward receipt run: sleep 30 timeout-minutes: 1 - + - name: Stop the local network and upload logs if: always() uses: maidsafe/sn-local-testnet-action@main @@ -348,7 +359,7 @@ jobs: SLACK_TITLE: "Nightly Royalty Reward Test Run Failed" token_distribution_test: - if: "!startsWith(github.event.head_commit.message, 'chore(release):')" + if: "!startsWith(github.event.head_commit.message, 'chore(release):')" name: token distribution test runs-on: ${{ matrix.os }} strategy: @@ -437,7 +448,7 @@ jobs: run: cargo build --release --features local-discovery --bin safenode --bin faucet timeout-minutes: 30 - - name: Build churn tests + - name: Build churn tests run: cargo test --release -p sn_node --features=local-discovery --test data_with_churn --no-run env: # only set the target dir for windows to bypass the linker issue. @@ -463,14 +474,14 @@ jobs: SN_LOG: "all" CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} timeout-minutes: 90 - + - name: Verify restart of nodes using rg shell: bash timeout-minutes: 1 # get the counts, then the specific line, and then the digit count only # then check we have an expected level of restarts # TODO: make this use an env var, or relate to testnet size - run : | + run: | restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats | \ rg "(\d+) matches" | rg "\d+" -o) echo "Restart $restart_count nodes" @@ -483,7 +494,7 @@ jobs: fi node_count=$(ls "${{ matrix.node_data_path }}" | wc -l) echo "Node dir count is $node_count" - + # TODO: reenable this once the testnet dir creation is tidied up to avoid a large count here # if [ $restart_count -lt $node_count ]; then # echo "Restart count of: $restart_count is less than the node count of: $node_count" @@ -496,7 +507,7 @@ jobs: # get the counts, then the specific line, and then the digit count only # then check we have an expected level of replication # TODO: make this use an env var, or relate to testnet size - run : | + run: | fetching_attempt_count=$(rg "FetchingKeysForReplication" "${{ matrix.node_data_path }}" -c --stats | \ rg "(\d+) matches" | rg "\d+" -o) echo "Carried out $fetching_attempt_count fetching attempts" @@ -513,7 +524,7 @@ jobs: action: stop log_file_prefix: safe_test_logs_churn platform: ${{ matrix.os }} - + - name: post notification to slack on failure if: ${{ failure() }} uses: bryannice/gitactions-slack-notification@2.0.0 @@ -535,114 +546,114 @@ jobs: fi verify_data_location_routing_table: - name: Verify data location and Routing Table - runs-on: ${{ matrix.os }} - strategy: - matrix: - include: - - os: ubuntu-latest - node_data_path: /home/runner/.local/share/safe/node - safe_path: /home/runner/.local/share/safe - - os: windows-latest - node_data_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe\\node - safe_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe - - os: macos-latest - node_data_path: /Users/runner/Library/Application Support/safe/node - safe_path: /Users/runner/Library/Application Support/safe - steps: - - uses: actions/checkout@v4 - - - name: Install Rust - uses: dtolnay/rust-toolchain@stable - - - uses: Swatinem/rust-cache@v2 - continue-on-error: true - - - name: Build binaries - run: cargo build --release --features local-discovery --bin safenode --bin faucet - timeout-minutes: 30 - - - name: Build data location and routing table tests - run: cargo test --release -p sn_node --features=local-discovery --test verify_data_location --test verify_routing_table --no-run - env: - # only set the target dir for windows to bypass the linker issue. - # happens if we build the node manager via testnet action - CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} - timeout-minutes: 30 - - - name: Start a local network - uses: maidsafe/sn-local-testnet-action@main - with: - action: start - interval: 2000 - node-path: target/release/safenode - faucet-path: target/release/faucet - platform: ${{ matrix.os }} - build: true - - - name: Verify the Routing table of the nodes - run: cargo test --release -p sn_node --features="local-discovery" --test verify_routing_table -- --nocapture - env: - CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} - timeout-minutes: 5 - - - name: Verify the location of the data on the network - run: cargo test --release -p sn_node --features="local-discovery" --test verify_data_location -- --nocapture - env: - SN_LOG: "all" - CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} - timeout-minutes: 90 - - - name: Verify the routing tables of the nodes - run: cargo test --release -p sn_node --features="local-discovery" --test verify_routing_table -- --nocapture - env: - CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} - timeout-minutes: 5 - - - name: Verify restart of nodes using rg - shell: bash - timeout-minutes: 1 - # get the counts, then the specific line, and then the digit count only - # then check we have an expected level of restarts - # TODO: make this use an env var, or relate to testnet size - run : | - restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats | \ - rg "(\d+) matches" | rg "\d+" -o) - echo "Restart $restart_count nodes" - peer_removed=$(rg "PeerRemovedFromRoutingTable" "${{ matrix.node_data_path }}" -c --stats | \ - rg "(\d+) matches" | rg "\d+" -o) - echo "PeerRemovedFromRoutingTable $peer_removed times" - if [ $peer_removed -lt $restart_count ]; then - echo "PeerRemovedFromRoutingTable times of: $peer_removed is less than the restart count of: $restart_count" - exit 1 - fi - node_count=$(ls "${{ matrix.node_data_path }}" | wc -l) - echo "Node dir count is $node_count" - - - name: Stop the local network and upload logs - if: always() - uses: maidsafe/sn-local-testnet-action@main - with: - action: stop - log_file_prefix: safe_test_logs_data_location - platform: ${{ matrix.os }} - - - name: post notification to slack on failure - if: ${{ failure() }} - uses: bryannice/gitactions-slack-notification@2.0.0 - env: - SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }} - SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}" - SLACK_TITLE: "Nightly Data Location Test Run Failed" - - # Only error out after uploading the logs - - name: Don't log raw data - if: matrix.os != 'windows-latest' # causes error - shell: bash - timeout-minutes: 10 - run: | - if ! rg '^' "${{ matrix.safe_path }}"/*/*/logs | awk 'length($0) > 15000 { print; exit 1 }' - then - echo "We are logging an extremely large data" - exit 1 - fi + name: Verify data location and Routing Table + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-latest + node_data_path: /home/runner/.local/share/safe/node + safe_path: /home/runner/.local/share/safe + - os: windows-latest + node_data_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe\\node + safe_path: C:\\Users\\runneradmin\\AppData\\Roaming\\safe + - os: macos-latest + node_data_path: /Users/runner/Library/Application Support/safe/node + safe_path: /Users/runner/Library/Application Support/safe + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@stable + + - uses: Swatinem/rust-cache@v2 + continue-on-error: true + + - name: Build binaries + run: cargo build --release --features local-discovery --bin safenode --bin faucet + timeout-minutes: 30 + + - name: Build data location and routing table tests + run: cargo test --release -p sn_node --features=local-discovery --test verify_data_location --test verify_routing_table --no-run + env: + # only set the target dir for windows to bypass the linker issue. + # happens if we build the node manager via testnet action + CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} + timeout-minutes: 30 + + - name: Start a local network + uses: maidsafe/sn-local-testnet-action@main + with: + action: start + interval: 2000 + node-path: target/release/safenode + faucet-path: target/release/faucet + platform: ${{ matrix.os }} + build: true + + - name: Verify the Routing table of the nodes + run: cargo test --release -p sn_node --features="local-discovery" --test verify_routing_table -- --nocapture + env: + CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} + timeout-minutes: 5 + + - name: Verify the location of the data on the network + run: cargo test --release -p sn_node --features="local-discovery" --test verify_data_location -- --nocapture + env: + SN_LOG: "all" + CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} + timeout-minutes: 90 + + - name: Verify the routing tables of the nodes + run: cargo test --release -p sn_node --features="local-discovery" --test verify_routing_table -- --nocapture + env: + CARGO_TARGET_DIR: ${{ matrix.os == 'windows-latest' && './test-target' || '.' }} + timeout-minutes: 5 + + - name: Verify restart of nodes using rg + shell: bash + timeout-minutes: 1 + # get the counts, then the specific line, and then the digit count only + # then check we have an expected level of restarts + # TODO: make this use an env var, or relate to testnet size + run: | + restart_count=$(rg "Node is restarting in" "${{ matrix.node_data_path }}" -c --stats | \ + rg "(\d+) matches" | rg "\d+" -o) + echo "Restart $restart_count nodes" + peer_removed=$(rg "PeerRemovedFromRoutingTable" "${{ matrix.node_data_path }}" -c --stats | \ + rg "(\d+) matches" | rg "\d+" -o) + echo "PeerRemovedFromRoutingTable $peer_removed times" + if [ $peer_removed -lt $restart_count ]; then + echo "PeerRemovedFromRoutingTable times of: $peer_removed is less than the restart count of: $restart_count" + exit 1 + fi + node_count=$(ls "${{ matrix.node_data_path }}" | wc -l) + echo "Node dir count is $node_count" + + - name: Stop the local network and upload logs + if: always() + uses: maidsafe/sn-local-testnet-action@main + with: + action: stop + log_file_prefix: safe_test_logs_data_location + platform: ${{ matrix.os }} + + - name: post notification to slack on failure + if: ${{ failure() }} + uses: bryannice/gitactions-slack-notification@2.0.0 + env: + SLACK_INCOMING_WEBHOOK: ${{ secrets.SLACK_GH_ACTIONS_WEBHOOK_URL }} + SLACK_MESSAGE: "Please check the logs for the run at ${{ env.WORKFLOW_URL }}/${{ github.run_id }}" + SLACK_TITLE: "Nightly Data Location Test Run Failed" + + # Only error out after uploading the logs + - name: Don't log raw data + if: matrix.os != 'windows-latest' # causes error + shell: bash + timeout-minutes: 10 + run: | + if ! rg '^' "${{ matrix.safe_path }}"/*/*/logs | awk 'length($0) > 15000 { print; exit 1 }' + then + echo "We are logging an extremely large data" + exit 1 + fi