From 3cb9b379dd2a999aef8044105d2a324b4ce3cd1c Mon Sep 17 00:00:00 2001 From: Florent Poinsard <35779988+frouioui@users.noreply.github.com> Date: Wed, 22 May 2024 02:36:54 -0600 Subject: [PATCH] Remove self-hosted runners in ci_workflow_gen (#15989) Signed-off-by: Florent Poinsard --- GITHUB_SELF_HOSTED_RUNNERS.md | 91 ------------ test/ci_workflow_gen.go | 138 +----------------- .../cluster_endtoend_test_self_hosted.tpl | 91 ------------ test/templates/unit_test_self_hosted.tpl | 90 ------------ 4 files changed, 1 insertion(+), 409 deletions(-) delete mode 100644 GITHUB_SELF_HOSTED_RUNNERS.md delete mode 100644 test/templates/cluster_endtoend_test_self_hosted.tpl delete mode 100644 test/templates/unit_test_self_hosted.tpl diff --git a/GITHUB_SELF_HOSTED_RUNNERS.md b/GITHUB_SELF_HOSTED_RUNNERS.md deleted file mode 100644 index 47d0f223df9..00000000000 --- a/GITHUB_SELF_HOSTED_RUNNERS.md +++ /dev/null @@ -1,91 +0,0 @@ -## Setting up and using GitHub Self hosted runners - -### Adding a new self-hosted runner -Steps to follow to add a new self-hosted runner for GitHub. -You will need access to the Equinix account for Vitess's CI testing and Admin -access to Vitess. - -1. Spawn a new c3.small instance and name it on the Equinix dashboard -2. use ssh to connect to the server -3. Install docker on the server by running the following commands - 1. `curl -fsSL https://get.docker.com -o get-docker.sh` - 2. `sudo sh get-docker.sh` -4. Create a new user with a home directory for the action runner - 1. `useradd -m github-runner` -5. Add the user to the docker group so that it can use docker as well - 1. `sudo usermod -aG docker github-runner` -6. Switch to the newly created user - 1. `su github-runner` -7. Goto the home directory of the user and follow the steps in [Adding self hosted runners to repository](https://docs.github.com/en/actions/hosting-your-own-runners/adding-self-hosted-runners#adding-a-self-hosted-runner-to-a-repository) - 1. `mkdir github-runner- && cd github-runner-` - 2. `curl -o actions-runner-linux-x64-2.280.3.tar.gz -L https://github.com/actions/runner/releases/download/v2.280.3/actions-runner-linux-x64-2.280.3.tar.gz` - 3. `tar xzf ./actions-runner-linux-x64-2.280.3.tar.gz` - 4. `./config.sh --url https://github.com/vitessio/vitess --token --name github-runner-` - 5. With a screen execute `./run.sh` -8. Set up a cron job to remove docker volumes and images every other weekday - 1. `crontab -e` - 2. Within the file add a line `0 5 * * 1,3,5 docker system prune -f --volumes --all` -9. Vtorc, Cluster 14 and some other tests use multiple MySQL instances which are all brought up with asynchronous I/O setup in InnoDB. This sometimes leads to us hitting the Linux asynchronous I/O limit. -To fix this we increase the default limit on the self-hosted runners by - - 1. To set the aio-max-nr value, add the following line to the /etc/sysctl.conf file: - 1. `fs.aio-max-nr = 1048576` - 2. To activate the new setting, run the following command: - 1. `sysctl -p /etc/sysctl.conf` - -### Moving a test to a self-hosted runner -Most of the code for running the tests is generated code by `make generate_ci_workflows` which uses the file `ci_workflow_gen.go` - -To move a unit test from GitHub runners to self-hosted runners, just move the test from `unitTestDatabases` to `unitTestSelfHostedDatabases` in `ci_workflow_gen.go` and call `make generate_ci_workflows` - -To move a cluster test from GitHub runners to self-hosted runners, just move the test from `clusterList` to `clusterSelfHostedList` in `ci_workflow_gen.go` and call `make generate_ci_workflows` - -### Using a self-hosted runner to debug a flaky test -You will need access to the self-hosted runner machine to be able to connect to it via SSH. -1. From the output of the run on GitHub Actions, find the `Machine name` in the `Set up job` step -2. Find that machine on the Equinix dashboard and connect to it via ssh -3. From the output of the `Print Volume Used` step find the volume used -4. From the output of the `Build Docker Image` step find the docker image built for this workflow -5. On the machine run `docker run -d -v :/vt/vtdataroot /bin/bash -c "sleep 600000000000"` -6. On the terminal copy the docker id of the newly created container -7. Now execute `docker exec -it /bin/bash` to go into the container and use the `/vt/vtdataroot` directory to find the output of the run along with the debug files -8. Alternately, execute `docker cp :/vt/vtdataroot ./debugFiles/` to copy the files from the docker container to the servers local file system -9. You can browse the files there or go a step further and download them locally via `scp`. -10. Please remember to cleanup the folders created and remove the docker container via `docker stop `. - -## Single Self-Hosted runners -There is currently one self-hosted runner which only hosts a single runner. This allows us to run tests -that do not use docker on that runner. - -All that is needed to be done is to add `runs-on: single-self-hosted`, remove any code that downloads -dependencies (since they are already present on the self-hosted runner) and add a couple of lines to save -the vtdataroot output if needed. - -[9944](https://github.com/vitessio/vitess/pull/9944/) is an example PR that moves one of the tests to a single-self-hosted runner. - -**NOTE** - It is essential to ensure that all the binaries spawned while running the test be stopped even on failure. -Otherwise, they will keep on running until someone goes ahead and removes them manually. They might interfere -with the future runs as well. - -### Using a single-self-hosted runner to debug a flaky test -The logs will be stored in the `savedRuns` directory and can be copied locally via `scp`. - -A cronjob is already setup to empty the `savedRuns` directory every week so please download the runs -before they are deleted. - -## Running out of disk space in Self-hosted runners - -If the loads on the self-hosted runners increases due to multiple tests being moved to them or some other reason, -they sometimes end up running out of disk space. This causes the runner to stop working all together. - -In order to fix this issue follow the following steps - -1. `ssh` into the self-hosted runner by finding its address from the equinix dashboard. -2. Clear out the disk by running `docker system prune -f --volumes --all`. This is the same command that we run on a cron on the server. -3. Switch to the `github-runner` user - 1. `su github-runner` -4. Resume an existing `screen` - 1. `screen -r` -5. Start the runner again. - 1. `./run.sh` -6. Verify that the runner has started accepting jobs again. Detach the screen and close the `ssh` connection. - - diff --git a/test/ci_workflow_gen.go b/test/ci_workflow_gen.go index 9d594a4f2a7..f1457f1be66 100644 --- a/test/ci_workflow_gen.go +++ b/test/ci_workflow_gen.go @@ -55,11 +55,7 @@ const ( // to be used. clusterTestTemplate = "templates/cluster_endtoend_test%s.tpl" - unitTestSelfHostedTemplate = "templates/unit_test_self_hosted.tpl" - unitTestSelfHostedDatabases = "" - dockerFileTemplate = "templates/dockerfile.tpl" - clusterTestSelfHostedTemplate = "templates/cluster_endtoend_test_self_hosted.tpl" - clusterTestDockerTemplate = "templates/cluster_endtoend_test_docker.tpl" + clusterTestDockerTemplate = "templates/cluster_endtoend_test_docker.tpl" ) var ( @@ -126,7 +122,6 @@ var ( "vttablet_prscomplex", } - clusterSelfHostedList = []string{} clusterDockerList = []string{} clustersRequiringXtraBackup = []string{ "xb_backup", @@ -168,12 +163,6 @@ type clusterTest struct { Cores16 bool } -type selfHostedTest struct { - Name, Platform, Dockerfile, Shard, ImageName, directoryName string - FileName string - MakeTools, InstallXtraBackup, Docker bool -} - // clusterMySQLVersions return list of mysql versions (one or more) that this cluster needs to test against func clusterMySQLVersions(clusterName string) mysqlVersions { switch { @@ -211,16 +200,6 @@ func main() { generateUnitTestWorkflows() generateClusterWorkflows(clusterList, clusterTestTemplate) generateClusterWorkflows(clusterDockerList, clusterTestDockerTemplate) - - // tests that will use self-hosted runners - err := generateSelfHostedUnitTestWorkflows() - if err != nil { - log.Fatal(err) - } - err = generateSelfHostedClusterWorkflows() - if err != nil { - log.Fatal(err) - } } func canonnizeList(list []string) []string { @@ -233,98 +212,6 @@ func canonnizeList(list []string) []string { return output } -func parseList(csvList string) []string { - var list []string - for _, item := range strings.Split(csvList, ",") { - if item != "" { - list = append(list, strings.TrimSpace(item)) - } - } - return list -} - -func generateSelfHostedUnitTestWorkflows() error { - platforms := parseList(unitTestSelfHostedDatabases) - for _, platform := range platforms { - directoryName := fmt.Sprintf("unit_test_%s", platform) - test := &selfHostedTest{ - Name: fmt.Sprintf("Unit Test (%s)", platform), - ImageName: fmt.Sprintf("unit_test_%s", platform), - Platform: platform, - directoryName: directoryName, - Dockerfile: fmt.Sprintf("./.github/docker/%s/Dockerfile", directoryName), - MakeTools: true, - InstallXtraBackup: false, - } - err := setupTestDockerFile(test) - if err != nil { - return err - } - test.FileName = fmt.Sprintf("unit_test_%s.yml", platform) - filePath := fmt.Sprintf("%s/%s", workflowConfigDir, test.FileName) - err = writeFileFromTemplate(unitTestSelfHostedTemplate, filePath, test) - if err != nil { - log.Print(err) - } - } - return nil -} - -func generateSelfHostedClusterWorkflows() error { - clusters := canonnizeList(clusterSelfHostedList) - for _, cluster := range clusters { - for _, mysqlVersion := range clusterMySQLVersions(cluster) { - // check mysqlversion - mysqlVersionIndicator := "" - if mysqlVersion != defaultMySQLVersion && len(clusterMySQLVersions(cluster)) > 1 { - mysqlVersionIndicator = "_" + string(mysqlVersion) - } - - directoryName := fmt.Sprintf("cluster_test_%s%s", cluster, mysqlVersionIndicator) - test := &selfHostedTest{ - Name: fmt.Sprintf("Cluster (%s)(%s)", cluster, mysqlVersion), - ImageName: fmt.Sprintf("cluster_test_%s%s", cluster, mysqlVersionIndicator), - Platform: "mysql80", - directoryName: directoryName, - Dockerfile: fmt.Sprintf("./.github/docker/%s/Dockerfile", directoryName), - Shard: cluster, - MakeTools: false, - InstallXtraBackup: false, - } - makeToolClusters := canonnizeList(clustersRequiringMakeTools) - for _, makeToolCluster := range makeToolClusters { - if makeToolCluster == cluster { - test.MakeTools = true - break - } - } - xtraBackupClusters := canonnizeList(clustersRequiringXtraBackup) - for _, xtraBackupCluster := range xtraBackupClusters { - if xtraBackupCluster == cluster { - test.InstallXtraBackup = true - break - } - } - if mysqlVersion == mysql57 { - test.Platform = string(mysql57) - } - - err := setupTestDockerFile(test) - if err != nil { - return err - } - - test.FileName = fmt.Sprintf("cluster_endtoend_%s%s.yml", cluster, mysqlVersionIndicator) - filePath := fmt.Sprintf("%s/%s", workflowConfigDir, test.FileName) - err = writeFileFromTemplate(clusterTestSelfHostedTemplate, filePath, test) - if err != nil { - log.Print(err) - } - } - } - return nil -} - func generateClusterWorkflows(list []string, tpl string) { clusters := canonnizeList(list) for _, cluster := range clusters { @@ -420,29 +307,6 @@ func evalengineToString(evalengine string) string { return "" } -func setupTestDockerFile(test *selfHostedTest) error { - // remove the directory - relDirectoryName := fmt.Sprintf("../.github/docker/%s", test.directoryName) - err := os.RemoveAll(relDirectoryName) - if err != nil { - return err - } - // create the directory - err = os.MkdirAll(relDirectoryName, 0755) - if err != nil { - return err - } - - // generate the docker file - dockerFilePath := path.Join(relDirectoryName, "Dockerfile") - err = writeFileFromTemplate(dockerFileTemplate, dockerFilePath, test) - if err != nil { - return err - } - - return nil -} - func writeFileFromTemplate(templateFile, filePath string, test any) error { tpl := template.New(path.Base(templateFile)) tpl.Funcs(template.FuncMap{ diff --git a/test/templates/cluster_endtoend_test_self_hosted.tpl b/test/templates/cluster_endtoend_test_self_hosted.tpl deleted file mode 100644 index e28de83004e..00000000000 --- a/test/templates/cluster_endtoend_test_self_hosted.tpl +++ /dev/null @@ -1,91 +0,0 @@ -name: {{.Name}} -on: [push, pull_request] -concurrency: - group: format('{0}-{1}', ${{"{{"}} github.ref {{"}}"}}, '{{.Name}}') - cancel-in-progress: true - -permissions: read-all - -jobs: - build: - name: Run endtoend tests on {{.Name}} - runs-on: self-hosted - - steps: - - name: Skip CI - run: | - if [[ "{{"${{contains( github.event.pull_request.labels.*.name, 'Skip CI')}}"}}" == "true" ]]; then - echo "skipping CI due to the 'Skip CI' label" - exit 1 - fi - - - name: Check if workflow needs to be skipped - id: skip-workflow - run: | - skip='false' - if [[ "{{"${{github.event.pull_request}}"}}" == "" ]] && [[ "{{"${{github.ref}}"}}" != "refs/heads/main" ]] && [[ ! "{{"${{github.ref}}"}}" =~ ^refs/heads/release-[0-9]+\.[0-9]$ ]] && [[ ! "{{"${{github.ref}}"}}" =~ "refs/tags/.*" ]]; then - skip='true' - fi - echo Skip ${skip} - echo "skip-workflow=${skip}" >> $GITHUB_OUTPUT - - - name: Check out code - if: steps.skip-workflow.outputs.skip-workflow == 'false' - uses: actions/checkout@v4 - - - name: Check for changes in relevant files - if: steps.skip-workflow.outputs.skip-workflow == 'false' - uses: dorny/paths-filter@v3.0.1 - id: changes - with: - token: '' - filters: | - end_to_end: - - 'go/**/*.go' - - 'go/vt/sidecardb/**/*.sql' - - 'go/test/endtoend/onlineddl/vrepl_suite/**' - - 'test.go' - - 'Makefile' - - 'build.env' - - 'go.sum' - - 'go.mod' - - 'proto/*.proto' - - 'tools/**' - - 'config/**' - - '.github/docker/**' - - 'bootstrap.sh' - - '.github/workflows/{{.FileName}}' - - - name: Build Docker Image - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' - run: docker build -f {{.Dockerfile}} -t {{.ImageName}}:$GITHUB_SHA . - - - name: Run test - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' - timeout-minutes: 30 - run: | - # We set the VTDATAROOT to the /tmp folder to reduce the file path of mysql.sock file - # which musn't be more than 107 characters long. - export VTDATAROOT="/tmp/" - - docker run --name "{{.ImageName}}_$GITHUB_SHA" {{.ImageName}}:$GITHUB_SHA /bin/bash -c 'source build.env && go run test.go -keep-data=true -docker=false -print-log -follow -shard {{.Shard}} -- -- --keep-data=true' - - - name: Print Volume Used - if: always() && steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' - run: | - docker inspect -f '{{"{{ (index .Mounts 0).Name }}"}}' {{.ImageName}}_$GITHUB_SHA - - - name: Cleanup Docker Volume - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' - run: | - docker rm -v {{.ImageName}}_$GITHUB_SHA - - - name: Cleanup Docker Container - if: always() && steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' - run: | - docker rm -f {{.ImageName}}_$GITHUB_SHA - - - name: Cleanup Docker Image - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' - run: | - docker image rm {{.ImageName}}:$GITHUB_SHA diff --git a/test/templates/unit_test_self_hosted.tpl b/test/templates/unit_test_self_hosted.tpl deleted file mode 100644 index c6d6790fbfb..00000000000 --- a/test/templates/unit_test_self_hosted.tpl +++ /dev/null @@ -1,90 +0,0 @@ -name: {{.Name}} -on: [push, pull_request] -concurrency: - group: format('{0}-{1}', ${{"{{"}} github.ref {{"}}"}}, '{{.Name}}') - cancel-in-progress: true - -permissions: read-all - -jobs: - test: - runs-on: self-hosted - - steps: - - name: Skip CI - run: | - if [[ "{{"${{contains( github.event.pull_request.labels.*.name, 'Skip CI')}}"}}" == "true" ]]; then - echo "skipping CI due to the 'Skip CI' label" - exit 1 - fi - - - name: Check if workflow needs to be skipped - id: skip-workflow - run: | - skip='false' - if [[ "{{"${{github.event.pull_request}}"}}" == "" ]] && [[ "{{"${{github.ref}}"}}" != "refs/heads/main" ]] && [[ ! "{{"${{github.ref}}"}}" =~ ^refs/heads/release-[0-9]+\.[0-9]$ ]] && [[ ! "{{"${{github.ref}}"}}" =~ "refs/tags/.*" ]]; then - skip='true' - fi - echo Skip ${skip} - echo "skip-workflow=${skip}" >> $GITHUB_OUTPUT - - - name: Check out code - if: steps.skip-workflow.outputs.skip-workflow == 'false' - uses: actions/checkout@v4 - - - name: Check for changes in relevant files - if: steps.skip-workflow.outputs.skip-workflow == 'false' - uses: dorny/paths-filter@v3.0.1 - id: changes - with: - token: '' - filters: | - unit_tests: - - 'go/**' - - 'test.go' - - 'Makefile' - - 'build.env' - - 'go.sum' - - 'go.mod' - - 'proto/*.proto' - - 'tools/**' - - 'config/**' - - 'bootstrap.sh' - - '.github/workflows/{{.FileName}}' - - - name: Build Docker Image - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.unit_tests == 'true' - run: docker build -f {{.Dockerfile}} -t {{.ImageName}}:$GITHUB_SHA . - - - name: Run test - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.unit_tests == 'true' - timeout-minutes: 30 - run: | - set -exo pipefail - # We set the VTDATAROOT to the /tmp folder to reduce the file path of mysql.sock file - # which musn't be more than 107 characters long. - export VTDATAROOT="/tmp/" - - docker run --name "{{.ImageName}}_$GITHUB_SHA" {{.ImageName}}:$GITHUB_SHA /bin/bash -c 'NOVTADMINBUILD=1 make unit_test' - - - name: Print Volume Used - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.unit_tests == 'true' - if: ${{"{{ always() }}"}} - run: | - docker inspect -f '{{"{{ (index .Mounts 0).Name }}"}}' {{.ImageName}}_$GITHUB_SHA - - - name: Cleanup Docker Volume - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.unit_tests == 'true' - run: | - docker rm -v {{.ImageName}}_$GITHUB_SHA - - - name: Cleanup Docker Container - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.unit_tests == 'true' - if: ${{"{{ always() }}"}} - run: | - docker rm -f {{.ImageName}}_$GITHUB_SHA - - - name: Cleanup Docker Image - if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.unit_tests == 'true' - run: | - docker image rm {{.ImageName}}:$GITHUB_SHA