From c25377aa145cb682eaaa5399be2ad2e73707549e Mon Sep 17 00:00:00 2001 From: Marian Steinbach Date: Mon, 9 Dec 2024 12:09:32 +0100 Subject: [PATCH] Rewrite link check script in Python (#2390) --- .github/workflows/check-links-in-prod.yaml | 151 ++++++--------------- 1 file changed, 42 insertions(+), 109 deletions(-) diff --git a/.github/workflows/check-links-in-prod.yaml b/.github/workflows/check-links-in-prod.yaml index bbfa3c8fbf..3f49998439 100644 --- a/.github/workflows/check-links-in-prod.yaml +++ b/.github/workflows/check-links-in-prod.yaml @@ -7,6 +7,11 @@ on: schedule: - cron: '39 0 20 * *' # every 20th of the month at 00:39 +env: + # space-separated list of the top-level sections + # to check in https://docs.giantswarm.io/{section}/ + SECTIONS: 'getting-started overview reference support tutorials' + jobs: check-links: runs-on: ubuntu-latest @@ -20,92 +25,32 @@ jobs: chmod 777 output - name: check-links-in-overview-pages - run: | - docker run --rm --name linkchecker \ - --volume ${PWD}/output:/workdir --workdir /workdir \ - ghcr.io/linkchecker/linkchecker:latest \ - https://docs.giantswarm.io/overview/ \ - --check-extern \ - --threads 1 \ - --recursion-level 2 \ - --no-status \ - --file-output html/utf8/overview.html \ - || echo "failed1=true" >> "$GITHUB_ENV" + shell: python {0} continue-on-error: true - - - name: check-links-in-getting-started-pages run: | - docker run --rm --name linkchecker \ - --volume ${PWD}/output:/workdir --workdir /workdir \ - ghcr.io/linkchecker/linkchecker:latest \ - https://docs.giantswarm.io/getting-started/ \ - --check-extern \ - --threads 1 \ - --recursion-level 2 \ - --no-status \ - --file-output html/utf8/getting-started.html \ - || echo "failed2=true" >> "$GITHUB_ENV" - continue-on-error: true + import os + import subprocess - - name: check-links-in-tutorials-pages - run: | - docker run --rm --name linkchecker \ - --volume ${PWD}/output:/workdir --workdir /workdir \ - ghcr.io/linkchecker/linkchecker:latest \ - https://docs.giantswarm.io/tutorials/ \ - --check-extern \ - --threads 1 \ - --recursion-level 2 \ - --no-status \ - --file-output html/utf8/tutorials.html \ - || echo "failed3=true" >> "$GITHUB_ENV" - continue-on-error: true + pwd = os.getcwd() + sections = os.getenv('SECTIONS').split() - - name: check-links-in-reference-pages - run: | - docker run --rm --name linkchecker \ - --volume ${PWD}/output:/workdir --workdir /workdir \ - ghcr.io/linkchecker/linkchecker:latest \ - https://docs.giantswarm.io/reference/ \ - --check-extern \ - --threads 1 \ - --recursion-level 2 \ - --no-status \ - --file-output html/utf8/reference.html \ - || echo "failed4=true" >> "$GITHUB_ENV" - continue-on-error: true - - - name: check-links-in-support-pages - run: | - docker run --rm --name linkchecker \ - --volume ${PWD}/output:/workdir --workdir /workdir \ - ghcr.io/linkchecker/linkchecker:latest \ - https://docs.giantswarm.io/support/ \ - --check-extern \ - --threads 1 \ - --recursion-level 2 \ - --no-status \ - --file-output html/utf8/support.html \ - || echo "failed5=true" >> "$GITHUB_ENV" - continue-on-error: true - - - name: check-links-in-changelogs - run: | - docker run --rm --name linkchecker \ - --volume ${PWD}/output:/workdir --workdir /workdir \ - ghcr.io/linkchecker/linkchecker:latest \ - https://docs.giantswarm.io/changes/ \ - --threads 1 \ - --recursion-level 2 \ - --no-status \ - --file-output html/utf8/changes.html \ - --ignore-url="^https://github.com/giantswarm/docs/.*" \ - --ignore-url="^https://.*example\.com/.*" \ - --ignore-url="^https://my-org\.github\.com/.*" \ - --ignore-url="^https://github\.com/giantswarm/giantswarm/.*" \ - --ignore-url=".*gigantic\.io.*" \ - || echo "failed6=true" >> "$GITHUB_ENV" - continue-on-error: true + for i, section in enumerate(sections): + # call docker run command + cmd = ['docker', 'run', '--rm', '--name', 'linkchecker', + '--volume', f'{pwd}/output:/workdir', '--workdir', '/workdir', + 'ghcr.io/linkchecker/linkchecker:latest', + f'https://docs.giantswarm.io/{section}/', + '--check-extern', + '--threads', '1', + '--recursion-level', '3', + '--no-status', + '--file-output', f'html/utf8/{section}.html'] + + result = subprocess.run(cmd) + if result.returncode > 0: + env_file = os.getenv('GITHUB_ENV') + with open(env_file, 'a') as f: + f.write(f"failed{i}=true\n") - name: Store reports as artifacts uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.2 @@ -116,31 +61,19 @@ jobs: retention-days: 14 - name: Report errors + shell: python {0} run: | - if [[ $failed1 == "true" ]]; then - echo "There has been some errors in overview checks, please check the step." - failed=true - fi - if [[ $failed2 == "true" ]]; then - echo "There has been some errors in getting started checks, please check the step." - failed=true - fi - if [[ $failed3 == "true" ]]; then - echo "There has been some errors in tutorials checks, please check the step." - failed=true - fi - if [[ $failed4 == "true" ]]; then - echo "There has been some errors in reference checks, please check the step." - failed=true - fi - if [[ $failed5 == "true" ]]; then - echo "There has been some errors in support checks, please check the step." - failed=true - fi - if [[ $failed6 == "true" ]]; then - echo "There has been some errors in changelogs checks, please check the step." - failed=true - fi - if [[ $failed == "true" ]]; then - exit 1 - fi + import sys + import os + + found_error = False + sections = os.getenv('SECTIONS').split() + + for i, section in enumerate(sections): + failed = os.getenv(f"failed{i}") + if failed == "true": + print(f"Found link errors in the {section} section. Please download the report artifact and fix.") + found_error = True + + if found_error: + sys.exit(1)