From bd0d2b74afbb347758bb7cc025c8652403fd258d Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 7 Oct 2024 13:26:57 +0100 Subject: [PATCH 1/9] Investigate flaky tests on CI Set up a temporary workflow to run all flaky tests repeatedly on CI for further investigation. --- .github/workflows/test-flaky.yml | 54 ++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 .github/workflows/test-flaky.yml diff --git a/.github/workflows/test-flaky.yml b/.github/workflows/test-flaky.yml new file mode 100644 index 00000000..50e35dff --- /dev/null +++ b/.github/workflows/test-flaky.yml @@ -0,0 +1,54 @@ +name: Test Repeat Flaky + +on: + pull_request: + +jobs: + cache-test-bins: + name: Prepare go test cache + runs-on: ubuntu-latest + strategy: + matrix: + go: + - 1.22 + - 1.23 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Go ${{ matrix.go }} + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go }} + cache: false + - name: Compile tests + run: go test -c -o f3-tests + - name: Cache compiled tests + uses: actions/cache/save@v4 + with: + path: './f3-tests' + key: f3-tests-flaky-${{ matrix.go }} + test: + name: ${{ matrix.test }} ${{ matrix.go }} + runs-on: ubuntu-latest + needs: cache-test-bins + strategy: + fail-fast: false + matrix: + go: + - 1.22 + - 1.23 + test: + - TestF3LateBootstrap + - TestF3PauseResumeCatchup + - TestF3WithLookback + - TestF3FailRecover + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Restore cached built F3 test binaries + uses: actions/cache/restore@v4 + with: + path: './f3-tests' + key: f3-tests-flaky-${{ matrix.go }} + - name: Repeat ${{ matrix.test }} + run: ./f3-tests -test.run=${{ matrix.test }} -test.count 50 \ No newline at end of file From 3a327fe4221ace1dde792fe1f6f3e84c8e19fddb Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 7 Oct 2024 14:30:58 +0100 Subject: [PATCH 2/9] Repeat them all 100 times --- .github/workflows/test-flaky.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-flaky.yml b/.github/workflows/test-flaky.yml index 50e35dff..3c7c8652 100644 --- a/.github/workflows/test-flaky.yml +++ b/.github/workflows/test-flaky.yml @@ -51,4 +51,4 @@ jobs: path: './f3-tests' key: f3-tests-flaky-${{ matrix.go }} - name: Repeat ${{ matrix.test }} - run: ./f3-tests -test.run=${{ matrix.test }} -test.count 50 \ No newline at end of file + run: ./f3-tests -test.run=${{ matrix.test }} -test.count 100 \ No newline at end of file From 802234aa6427882506ac0928a37699abdac09bb0 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 7 Oct 2024 14:48:55 +0100 Subject: [PATCH 3/9] Re-run all directly using go test --- .github/workflows/test-flaky.yml | 65 ++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/.github/workflows/test-flaky.yml b/.github/workflows/test-flaky.yml index 3c7c8652..9b518fd5 100644 --- a/.github/workflows/test-flaky.yml +++ b/.github/workflows/test-flaky.yml @@ -4,33 +4,33 @@ on: pull_request: jobs: - cache-test-bins: - name: Prepare go test cache - runs-on: ubuntu-latest - strategy: - matrix: - go: - - 1.22 - - 1.23 - steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Go ${{ matrix.go }} - uses: actions/setup-go@v5 - with: - go-version: ${{ matrix.go }} - cache: false - - name: Compile tests - run: go test -c -o f3-tests - - name: Cache compiled tests - uses: actions/cache/save@v4 - with: - path: './f3-tests' - key: f3-tests-flaky-${{ matrix.go }} + # cache-test-bins: + # name: Prepare go test cache + # runs-on: ubuntu-latest + # strategy: + # matrix: + # go: + # - 1.22 + # - 1.23 + # steps: + # - name: Checkout + # uses: actions/checkout@v4 + # - name: Setup Go ${{ matrix.go }} + # uses: actions/setup-go@v5 + # with: + # go-version: ${{ matrix.go }} + # cache: false + # - name: Compile tests + # run: go test -c -o f3-tests + # - name: Cache compiled tests + # uses: actions/cache/save@v4 + # with: + # path: './f3-tests' + # key: f3-tests-flaky-${{ matrix.go }} test: name: ${{ matrix.test }} ${{ matrix.go }} runs-on: ubuntu-latest - needs: cache-test-bins + # needs: cache-test-bins strategy: fail-fast: false matrix: @@ -45,10 +45,17 @@ jobs: steps: - name: Checkout uses: actions/checkout@v4 - - name: Restore cached built F3 test binaries - uses: actions/cache/restore@v4 + # - name: Restore cached built F3 test binaries + # uses: actions/cache/restore@v4 + # with: + # path: './f3-tests' + # key: f3-tests-flaky-${{ matrix.go }} + # - name: Repeat ${{ matrix.test }} + # run: ./f3-tests -test.run=${{ matrix.test }} -test.count 100 + - name: Setup Go ${{ matrix.go }} + uses: actions/setup-go@v5 with: - path: './f3-tests' - key: f3-tests-flaky-${{ matrix.go }} + go-version: ${{ matrix.go }} + cache: false - name: Repeat ${{ matrix.test }} - run: ./f3-tests -test.run=${{ matrix.test }} -test.count 100 \ No newline at end of file + run: go test . -run=${{ matrix.test }} -count 50 \ No newline at end of file From 52ca4f4c30fa90adb9de0a1b27b611055949085a Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 7 Oct 2024 14:56:42 +0100 Subject: [PATCH 4/9] Re-run all with race detector --- .github/workflows/test-flaky.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-flaky.yml b/.github/workflows/test-flaky.yml index 9b518fd5..38fa9cf0 100644 --- a/.github/workflows/test-flaky.yml +++ b/.github/workflows/test-flaky.yml @@ -58,4 +58,4 @@ jobs: go-version: ${{ matrix.go }} cache: false - name: Repeat ${{ matrix.test }} - run: go test . -run=${{ matrix.test }} -count 50 \ No newline at end of file + run: go test -race . -run=${{ matrix.test }} -v -count 50 \ No newline at end of file From a0027032a1a6215ddf13ff32c4f90e762d269b41 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 7 Oct 2024 15:04:16 +0100 Subject: [PATCH 5/9] All together now --- .github/workflows/test-flaky.yml | 12 ++++++------ f3_test.go | 3 +++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-flaky.yml b/.github/workflows/test-flaky.yml index 38fa9cf0..8945a9c4 100644 --- a/.github/workflows/test-flaky.yml +++ b/.github/workflows/test-flaky.yml @@ -37,11 +37,11 @@ jobs: go: - 1.22 - 1.23 - test: - - TestF3LateBootstrap - - TestF3PauseResumeCatchup - - TestF3WithLookback - - TestF3FailRecover +# test: +# - TestF3LateBootstrap +# - TestF3PauseResumeCatchup +# - TestF3WithLookback +# - TestF3FailRecover steps: - name: Checkout uses: actions/checkout@v4 @@ -58,4 +58,4 @@ jobs: go-version: ${{ matrix.go }} cache: false - name: Repeat ${{ matrix.test }} - run: go test -race . -run=${{ matrix.test }} -v -count 50 \ No newline at end of file + run: go test -v -count 50 . --tags=theflakybunch \ No newline at end of file diff --git a/f3_test.go b/f3_test.go index 726933f7..45486fac 100644 --- a/f3_test.go +++ b/f3_test.go @@ -1,3 +1,6 @@ +//go:build theflakybunch +// +build theflakybunch + package f3_test import ( From 90499ac92f2d5a752f4c34ccdfa795b27405bbc3 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Mon, 7 Oct 2024 15:24:45 +0100 Subject: [PATCH 6/9] Do not run flaky tests in parallel --- f3_test.go | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/f3_test.go b/f3_test.go index 45486fac..6a9329f5 100644 --- a/f3_test.go +++ b/f3_test.go @@ -46,7 +46,6 @@ func TestF3Simple(t *testing.T) { } func TestF3WithLookback(t *testing.T) { - t.Parallel() env := newTestEnvironment(t).withNodes(2).withManifest(func(m *manifest.Manifest) { m.EC.HeadLookback = 20 }).start() @@ -92,7 +91,6 @@ func TestF3WithLookback(t *testing.T) { } func TestF3PauseResumeCatchup(t *testing.T) { - t.Parallel() env := newTestEnvironment(t).withNodes(3).start() env.waitForInstanceNumber(1, 30*time.Second, true) @@ -131,7 +129,6 @@ func TestF3PauseResumeCatchup(t *testing.T) { } func TestF3FailRecover(t *testing.T) { - t.Parallel() env := newTestEnvironment(t).withNodes(2) // Make it possible to fail a single write for node 0. @@ -245,7 +242,6 @@ func TestF3DynamicManifest_WithPauseAndRebootstrap(t *testing.T) { } func TestF3LateBootstrap(t *testing.T) { - t.Parallel() env := newTestEnvironment(t).withNodes(2).start() // Wait till we're "caught up". @@ -307,12 +303,14 @@ type testNode struct { } func (n *testNode) currentGpbftInstance() uint64 { - c, err := n.f3.GetLatestCert(n.e.testCtx) - require.NoError(n.e.t, err) - if c == nil { - return n.e.manifest.InitialInstance - } - return c.GPBFTInstance + 1 + instance, _, _ := n.f3.Progress() + return instance + //c, err := n.f3.GetLatestCert(n.e.testCtx) + //require.NoError(n.e.t, err) + //if c == nil { + // return n.e.manifest.InitialInstance + //} + //return c.GPBFTInstance + 1 } func (n *testNode) init() *f3.F3 { From bd76081dc4d0e0e4a19b550b7568f18a5570ee55 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Mon, 7 Oct 2024 10:00:29 -0700 Subject: [PATCH 7/9] 30m test run --- .github/workflows/test-flaky.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-flaky.yml b/.github/workflows/test-flaky.yml index 8945a9c4..08f50f0e 100644 --- a/.github/workflows/test-flaky.yml +++ b/.github/workflows/test-flaky.yml @@ -58,4 +58,4 @@ jobs: go-version: ${{ matrix.go }} cache: false - name: Repeat ${{ matrix.test }} - run: go test -v -count 50 . --tags=theflakybunch \ No newline at end of file + run: go test -v -timeout 30m -count 50 . --tags=theflakybunch From 17dd66e88f2740047da4ccadf4139540d2bdfe01 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Mon, 7 Oct 2024 10:05:02 -0700 Subject: [PATCH 8/9] revert current gpbft instance --- f3_test.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/f3_test.go b/f3_test.go index 6a9329f5..e77cc011 100644 --- a/f3_test.go +++ b/f3_test.go @@ -303,14 +303,12 @@ type testNode struct { } func (n *testNode) currentGpbftInstance() uint64 { - instance, _, _ := n.f3.Progress() - return instance - //c, err := n.f3.GetLatestCert(n.e.testCtx) - //require.NoError(n.e.t, err) - //if c == nil { - // return n.e.manifest.InitialInstance - //} - //return c.GPBFTInstance + 1 + c, err := n.f3.GetLatestCert(n.e.testCtx) + require.NoError(n.e.t, err) + if c == nil { + return n.e.manifest.InitialInstance + } + return c.GPBFTInstance + 1 } func (n *testNode) init() *f3.F3 { From 486b9a81abee9b2ebfa19f7520dde49aceeaa9dc Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Tue, 8 Oct 2024 10:32:59 +0100 Subject: [PATCH 9/9] Remove remaining calls to t.Parallel --- f3_test.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/f3_test.go b/f3_test.go index e77cc011..752316b5 100644 --- a/f3_test.go +++ b/f3_test.go @@ -40,7 +40,6 @@ func init() { var manifestSenderTimeout = 10 * time.Second func TestF3Simple(t *testing.T) { - t.Parallel() env := newTestEnvironment(t).withNodes(2).start() env.waitForInstanceNumber(5, 10*time.Second, false) } @@ -159,7 +158,6 @@ func TestF3FailRecover(t *testing.T) { } func TestF3DynamicManifest_WithoutChanges(t *testing.T) { - t.Parallel() env := newTestEnvironment(t).withNodes(2).withDynamicManifest() env.start() @@ -172,7 +170,6 @@ func TestF3DynamicManifest_WithoutChanges(t *testing.T) { } func TestF3DynamicManifest_WithRebootstrap(t *testing.T) { - t.Parallel() env := newTestEnvironment(t).withNodes(2).withDynamicManifest().start() prev := env.nodes[0].f3.Manifest() @@ -214,7 +211,6 @@ func TestF3DynamicManifest_WithRebootstrap(t *testing.T) { } func TestF3DynamicManifest_WithPauseAndRebootstrap(t *testing.T) { - t.Parallel() env := newTestEnvironment(t).withNodes(2).withDynamicManifest().start() env.waitForInstanceNumber(10, 30*time.Second, true)