add telemetry #6286
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: pr | ||
on: | ||
push: | ||
branches: | ||
- "pull-request/[0-9]+" | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
cancel-in-progress: true | ||
env: | ||
# TODO: put this in a shared org-wide secret? | ||
OTEL_SERVICE_NAME: 'pr-cudf' | ||
# TODO: this should be set as an org-wide variable | ||
OTEL_EXPORTER_OTLP_ENDPOINT: https://tempo.gha-runners.nvidia.com:4318 | ||
# These are where the secrets in github env vars are written to files. These files don't | ||
# exist unless you explicitly write them in a step. | ||
# The purpose of setting the environment variable is to tell OpenTelemetry tools where to find them. | ||
# We abuse it a bit by also using it as the write destination for the certificate files. | ||
OTEL_EXPORTER_OTLP_CERTIFICATE: "/tmp/certs/ca.crt" | ||
OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE: "/tmp/certs/client.crt" | ||
OTEL_EXPORTER_OTLP_CLIENT_KEY: "/tmp/certs/client.key" | ||
OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf" | ||
OTEL_EXPORTER_OTLP_HEADERS: ${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }} | ||
jobs: | ||
telemetry-setup: | ||
runs-on: ubuntu-latest | ||
outputs: | ||
start_time: ${{ steps.timestamp.outputs.START_TIME }} | ||
traceparent: ${{ steps.telemetry-setup.outputs.traceparent }} | ||
endpoint: ${{ steps.var-reexports.outputs.endpoint }} | ||
top_level_service_name: ${{ steps.var-reexports.outputs.service_name }} | ||
steps: | ||
- name: Get starting timestamp | ||
id: timestamp | ||
run: | ||
echo "START_TIME=$(date +%s.%N)" >> ${GITHUB_OUTPUT} | ||
- name: Echo endpoint to make it available to shared workflows | ||
id: var-reexports | ||
run: | | ||
echo endpoint="${OTEL_EXPORTER_OTLP_ENDPOINT}" >> ${GITHUB_OUTPUT} | ||
echo service_name="${OTEL_SERVICE_NAME}" >> ${GITHUB_OUTPUT} | ||
- name: Write certificate files for mTLS | ||
run: | | ||
mkdir -p /tmp/certs | ||
cat << EOF > "${OTEL_EXPORTER_OTLP_CERTIFICATE}" | ||
${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }} | ||
EOF | ||
cat << EOF > "${OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE}" | ||
${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }} | ||
EOF | ||
cat << EOF > "${OTEL_EXPORTER_OTLP_CLIENT_KEY}" | ||
${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }} | ||
EOF | ||
- name: Telemetry setup | ||
id: telemetry-setup | ||
uses: rapidsai/shared-actions/telemetry-traceparent@add-telemetry | ||
- name: Start root span | ||
uses: rapidsai/shared-actions/telemetry-create-span@add-telemetry | ||
with: | ||
name: "root span" | ||
traceparent: ${{steps.telemetry-setup.outputs.traceparent}} | ||
start_time: ${{steps.timestamp.outputs.start_time}} | ||
pr-builder: | ||
needs: | ||
- changed-files | ||
- checks | ||
- conda-cpp-build | ||
- conda-cpp-tests | ||
- conda-cpp-checks | ||
- conda-notebook-tests | ||
- conda-python-build | ||
- conda-python-tests | ||
- docs-build | ||
- telemetry-setup | ||
- wheel-build-pylibcugraph | ||
- wheel-tests-pylibcugraph | ||
- wheel-build-cugraph | ||
- wheel-tests-cugraph | ||
- wheel-build-nx-cugraph | ||
- wheel-tests-nx-cugraph | ||
- wheel-build-cugraph-dgl | ||
- wheel-tests-cugraph-dgl | ||
- wheel-build-cugraph-pyg | ||
- wheel-tests-cugraph-pyg | ||
- wheel-build-cugraph-equivariant | ||
- wheel-tests-cugraph-equivariant | ||
- devcontainer | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@add-telemetry | ||
if: always() | ||
with: | ||
needs: ${{ toJSON(needs) }} | ||
changed-files: | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@add-telemetry | ||
with: | ||
files_yaml: | | ||
test_cpp: | ||
- '**' | ||
- '!.devcontainers/**' | ||
- '!CONTRIBUTING.md' | ||
- '!README.md' | ||
- '!docs/**' | ||
- '!img/**' | ||
- '!mg_utils/**' | ||
- '!notebooks/**' | ||
- '!python/**' | ||
- '!readme_pages/**' | ||
test_notebooks: | ||
- '**' | ||
- '!.devcontainers/**' | ||
- '!CONTRIBUTING.md' | ||
- '!README.md' | ||
- '!docs/**' | ||
test_python: | ||
- '**' | ||
- '!.devcontainers/**' | ||
- '!CONTRIBUTING.md' | ||
- '!README.md' | ||
- '!docs/**' | ||
- '!img/**' | ||
- '!notebooks/**' | ||
checks: | ||
secrets: inherit | ||
needs: telemetry-setup | ||
uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@add-telemetry | ||
with: | ||
enable_check_generated_files: false | ||
ignored_pr_jobs: "final_span_update" | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
conda-cpp-build: | ||
needs: | ||
- checks | ||
- telemetry-setup | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
node_type: cpu32 | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
conda-cpp-tests: | ||
needs: [conda-cpp-build, changed-files, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@add-telemetry | ||
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp | ||
with: | ||
build_type: pull-request | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
conda-cpp-checks: | ||
needs: [conda-cpp-build, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
enable_check_symbols: true | ||
symbol_exclusions: (cugraph::ops|hornet|void writeEdgeCountsKernel|void markUniqueOffsetsKernel) | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
Check failure on line 163 in .github/workflows/pr.yaml GitHub Actions / prInvalid workflow file
|
||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
conda-python-build: | ||
needs: [conda-cpp-build, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
conda-python-tests: | ||
needs: [conda-python-build, changed-files, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@add-telemetry | ||
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python | ||
with: | ||
build_type: pull-request | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
conda-notebook-tests: | ||
needs: [conda-python-build, changed-files, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@add-telemetry | ||
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks | ||
with: | ||
build_type: pull-request | ||
node_type: "gpu-v100-latest-1" | ||
arch: "amd64" | ||
container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.10" | ||
run_script: "ci/test_notebooks.sh" | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
docs-build: | ||
needs: [conda-python-build, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
node_type: "gpu-v100-latest-1" | ||
arch: "amd64" | ||
container_image: "rapidsai/ci-conda:cuda11.8.0-ubuntu22.04-py3.10" | ||
run_script: "ci/build_docs.sh" | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-build-pylibcugraph: | ||
needs: [checks, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
script: ci/build_wheel_pylibcugraph.sh | ||
extra-repo: rapidsai/cugraph-ops | ||
extra-repo-sha: branch-24.12 | ||
extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY | ||
node_type: cpu32 | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-tests-pylibcugraph: | ||
needs: [wheel-build-pylibcugraph, changed-files, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@add-telemetry | ||
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python | ||
with: | ||
build_type: pull-request | ||
script: ci/test_wheel_pylibcugraph.sh | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-build-cugraph: | ||
needs: [wheel-tests-pylibcugraph, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
script: ci/build_wheel_cugraph.sh | ||
extra-repo: rapidsai/cugraph-ops | ||
extra-repo-sha: branch-24.12 | ||
extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-tests-cugraph: | ||
needs: [wheel-build-cugraph, changed-files, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@add-telemetry | ||
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python | ||
with: | ||
build_type: pull-request | ||
script: ci/test_wheel_cugraph.sh | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-build-nx-cugraph: | ||
needs: | ||
- telemetry-setup | ||
- wheel-tests-pylibcugraph | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
script: ci/build_wheel_nx-cugraph.sh | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-tests-nx-cugraph: | ||
needs: [wheel-build-nx-cugraph, changed-files, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@add-telemetry | ||
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python | ||
with: | ||
build_type: pull-request | ||
script: ci/test_wheel_nx-cugraph.sh | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-build-cugraph-dgl: | ||
needs: [wheel-tests-cugraph, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
script: ci/build_wheel_cugraph-dgl.sh | ||
wheel-tests-cugraph-dgl: | ||
needs: [wheel-build-cugraph-dgl, changed-files, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@add-telemetry | ||
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python | ||
with: | ||
build_type: pull-request | ||
script: ci/test_wheel_cugraph-dgl.sh | ||
matrix_filter: map(select(.ARCH == "amd64")) | ||
wheel-build-cugraph-pyg: | ||
needs: [wheel-tests-cugraph, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
script: ci/build_wheel_cugraph-pyg.sh | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-tests-cugraph-pyg: | ||
needs: [wheel-build-cugraph-pyg, changed-files, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@add-telemetry | ||
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python | ||
with: | ||
build_type: pull-request | ||
script: ci/test_wheel_cugraph-pyg.sh | ||
matrix_filter: map(select(.ARCH == "amd64")) | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-build-cugraph-equivariant: | ||
secrets: inherit | ||
needs: telemetry-setup | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@add-telemetry | ||
with: | ||
build_type: pull-request | ||
script: ci/build_wheel_cugraph-equivariant.sh | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
wheel-tests-cugraph-equivariant: | ||
needs: [wheel-build-cugraph-equivariant, changed-files, telemetry-setup] | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@add-telemetry | ||
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python | ||
with: | ||
build_type: pull-request | ||
script: ci/test_wheel_cugraph-equivariant.sh | ||
matrix_filter: map(select(.ARCH == "amd64")) | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
devcontainer: | ||
secrets: inherit | ||
uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@add-telemetry | ||
needs: telemetry-setup | ||
with: | ||
arch: '["amd64"]' | ||
cuda: '["12.5"]' | ||
node_type: cpu32 | ||
extra-repo-deploy-key: CUGRAPH_OPS_SSH_PRIVATE_DEPLOY_KEY | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{ needs.telemetry-setup.outputs.traceparent }} | ||
build_command: | | ||
sccache -z; | ||
build-all --verbose -j$(nproc --ignore=1) -DBUILD_CUGRAPH_MG_TESTS=ON; | ||
sccache -s; | ||
final_span_update: | ||
runs-on: ubuntu-latest | ||
needs: [pr-builder, telemetry-setup] | ||
steps: | ||
- name: Get final timestamp | ||
id: timestamp | ||
run: | ||
echo "FINAL_TIME=$(date +%s.%N)" >> ${GITHUB_OUTPUT} | ||
# Main purpose of this traceparent line here is to ensure that otel-cli is installed. | ||
- name: Get job traceparent | ||
uses: rapidsai/shared-actions/telemetry-traceparent@add-telemetry | ||
- name: Write certificate files for mTLS | ||
run: | | ||
mkdir -p /tmp/certs | ||
cat << EOF > ${OTEL_EXPORTER_OTLP_CERTIFICATE} | ||
${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }} | ||
EOF | ||
cat << EOF > ${OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE} | ||
${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }} | ||
EOF | ||
cat << EOF > ${OTEL_EXPORTER_OTLP_CLIENT_KEY} | ||
${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }} | ||
EOF | ||
- name: Update root span with final completion time | ||
if: always() | ||
uses: rapidsai/shared-actions/telemetry-create-span@add-telemetry | ||
with: | ||
service: ${{needs.telemetry-setup.outputs.top_level_service_name}} | ||
name: "end-of-job update" | ||
default_endpoint: "${{needs.telemetry-setup.outputs.endpoint}}" | ||
traceparent: ${{needs.telemetry-setup.outputs.traceparent}} | ||
start_time: ${{needs.telemetry-setup.outputs.start_time}} | ||
end_time: ${{steps.timestamp.outputs.FINAL_TIME}} |