diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4dfcaf1ae..0bcc478e9 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -9,7 +9,54 @@ concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true +env: + OTEL_SERVICE_NAME: 'pr-rmm' + # TODO: this should be set as an org-wide variable + OTEL_EXPORTER_OTLP_ENDPOINT: https://tempo.gha-runners.nvidia.com:4318 + OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf" + OTEL_RESOURCE_ATTRIBUTES: "git.repository=${{ github.repository }},git.ref=${{ github.ref }},git.sha=${{ github.sha }},git.job_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" + # Set these to point the shared-actions clones at a branch on the rapidsai/shared-actions repo + SHARED_ACTIONS_REPO: "rapidsai/shared-actions" + SHARED_ACTIONS_REF: "main" + jobs: + reexports: + # Re-export secrets and env vars to make them available to shared actions and workflows + # You can't use env.SOMETHING in a "with:" section when calling a shared workflow. + runs-on: ubuntu-latest + outputs: + endpoint: "${{steps.reexport.outputs.endpoint}}" + service_name: "${{steps.reexport.outputs.service_name}}" + otel_resource_attributes: "${{steps.reexport.outputs.otel_resource_attributes}}" + shared_actions_repo: "${{steps.reexport.outputs.shared_actions_repo}}" + shared_actions_ref: "${{steps.reexport.outputs.shared_actions_ref}}" + steps: + - id: reexport + # NOTE: certs are base-64 encoded so that they're easier to handle here + run: | + echo endpoint="${OTEL_EXPORTER_OTLP_ENDPOINT}" >> ${GITHUB_OUTPUT} + echo service_name="${OTEL_SERVICE_NAME}" >> ${GITHUB_OUTPUT} + echo otel_resource_attributes="${OTEL_RESOURCE_ATTRIBUTES}" >> ${GITHUB_OUTPUT} + echo shared_actions_ref="${SHARED_ACTIONS_REF}" >> ${GITHUB_OUTPUT} + echo shared_actions_repo="${SHARED_ACTIONS_REPO}" >> ${GITHUB_OUTPUT} + top-level-telemetry-traceparent: + runs-on: ubuntu-latest + needs: + - reexports + outputs: + traceparent: "${{steps.traceparent.outputs.traceparent}}" + env: + OTEL_SERVICE_NAME: ${{needs.reexports.outputs.service_name}} + steps: + - name: Clone shared-actions repo + uses: actions/checkout@v4 + with: + repository: ${{env.SHARED_ACTIONS_REPO}} + ref: ${{env.SHARED_ACTIONS_REF}} + path: ./shared-actions + - id: traceparent + uses: ./shared-actions/telemetry-traceparent + pr-builder: needs: - changed-files @@ -19,6 +66,8 @@ jobs: - conda-python-build - conda-python-tests - docs-build + - reexports + - top-level-telemetry-traceparent - wheel-build-cpp - wheel-build-python - wheel-tests @@ -30,8 +79,15 @@ jobs: needs: ${{ toJSON(needs) }} changed-files: secrets: inherit + needs: [reexports, top-level-telemetry-traceparent] uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12 with: + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} + files_yaml: | test_cpp: - '**' @@ -50,37 +106,78 @@ jobs: - '!img/**' checks: secrets: inherit + needs: [reexports, top-level-telemetry-traceparent] uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12 with: enable_check_generated_files: false + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} + ignored_pr_jobs: "final-telemetry-update" + conda-cpp-build: - needs: checks + needs: + - checks + - reexports + - top-level-telemetry-traceparent secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.12 with: build_type: pull-request + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} + conda-cpp-tests: - needs: [conda-cpp-build, changed-files] + needs: [conda-cpp-build, changed-files, reexports, top-level-telemetry-traceparent] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} + conda-python-build: - needs: conda-cpp-build + needs: + - conda-cpp-build + - reexports + - top-level-telemetry-traceparent secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.12 with: build_type: pull-request + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} + conda-python-tests: - needs: [conda-python-build, changed-files] + needs: [conda-python-build, changed-files, reexports, top-level-telemetry-traceparent] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} + docs-build: - needs: conda-python-build + needs: + - conda-python-build + - reexports + - top-level-telemetry-traceparent secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12 with: @@ -89,36 +186,101 @@ jobs: arch: "amd64" container_image: "rapidsai/ci-conda:latest" run_script: "ci/build_docs.sh" + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: "${{needs.reexports.outputs.shared_actions_ref}}" + wheel-build-cpp: - needs: checks + needs: + - checks + - reexports + - top-level-telemetry-traceparent secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) build_type: pull-request script: ci/build_wheel_cpp.sh + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} + wheel-build-python: - needs: wheel-build-cpp + needs: + - wheel-build-cpp + - reexports + - top-level-telemetry-traceparent secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.12 with: build_type: pull-request script: ci/build_wheel_python.sh + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} + wheel-tests: - needs: [wheel-build-python, changed-files] + needs: [wheel-build-python, changed-files, reexports, top-level-telemetry-traceparent] secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python with: build_type: pull-request script: ci/test_wheel.sh + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} + devcontainer: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.12 + needs: + - reexports + - top-level-telemetry-traceparent with: arch: '["amd64"]' cuda: '["12.5"]' + default_endpoint: "${{needs.reexports.outputs.endpoint}}" + otel_resource_attributes: "${{needs.reexports.outputs.otel_resource_attributes}}" + traceparent: ${{ needs.top-level-telemetry-traceparent.outputs.traceparent }} + shared_actions_repo: ${{needs.reexports.outputs.shared_actions_repo}} + shared_actions_ref: ${{needs.reexports.outputs.shared_actions_ref}} build_command: | sccache -z; build-all -DBUILD_BENCHMARKS=ON --verbose; sccache -s; + final-telemetry-update: + runs-on: ubuntu-latest + needs: + - reexports + - top-level-telemetry-traceparent + - pr-builder + steps: + - name: Clone shared-actions repo + uses: actions/checkout@v4 + with: + repository: ${{needs.reexports.outputs.shared_actions_repo}} + ref: ${{needs.reexports.outputs.shared_actions_ref}} + path: ./shared-actions + - name: Send summary + uses: ./shared-actions/telemetry-summarize + with: + traceparent: ${{needs.top-level-telemetry-traceparent.outputs.traceparent}} + ca_cert: "${{secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE}}" + client_cert: "${{secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE}}" + client_key: "${{secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY}}" + - name: Send root span with final time + uses: ./shared-actions/telemetry-finalize-root-span + with: + traceparent: ${{needs.top-level-telemetry-traceparent.outputs.traceparent}} + ca_cert: "${{secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE}}" + client_cert: "${{secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE}}" + client_key: "${{secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY}}"