Skip to content

(Single-card) Device perf regressions #413

(Single-card) Device perf regressions

(Single-card) Device perf regressions #413

name: "[post-commit] models - Run device perf regressions on models and output report"
on:
workflow_dispatch:
push:
branches: ["main"]
schedule:
- cron: "0 2 * * *"
workflow_call:
jobs:
build-and-test-measure-device-perf:
strategy:
# Do not fail-fast because we need to ensure all tests go to completion
# so we try not to get hanging machines
fail-fast: false
matrix:
runner-info: [
{arch: grayskull, runs-on: ["perf-no-reset-grayskull", "self-reset"], machine-type: "bare_metal"},
]
env:
TT_METAL_ENV: ${{ vars.TT_METAL_ENV }}
ARCH_NAME: ${{ matrix.runner-info.arch }}
environment: dev
runs-on: ${{ matrix.runner-info.runs-on }}
steps:
- uses: tenstorrent-metal/metal-workflows/.github/actions/checkout-with-submodule-lfs@main
with:
token: ${{ secrets.CHECKOUT_TOKEN }}
- name: Ensure weka mount is active
run: |
sudo systemctl restart mnt-MLPerf.mount
sudo /etc/rc.local
ls -al /mnt/MLPerf/bit_error_tests
- name: Set up dynamic env vars for build
run: |
echo "TT_METAL_HOME=$(pwd)" >> $GITHUB_ENV
- name: Build tt-metal and libs
run: |
./scripts/build_scripts/build_with_profiler_opt.sh
- name: Run device performance regressions
timeout-minutes: 30
run: |
source build/python_env/bin/activate
./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type models_device_performance_${{ matrix.runner-info.machine-type }}
- name: Check device perf report exists
id: check-device-perf-report
run: |
ls -hal
export DEVICE_PERF_REPORT_FILENAME=Models_Device_Perf_$(date +%Y_%m_%d).csv
ls -hal $DEVICE_PERF_REPORT_FILENAME
echo "device_perf_report_filename=$DEVICE_PERF_REPORT_FILENAME" >> "$GITHUB_OUTPUT"
- name: Upload device perf report
uses: actions/upload-artifact@v3
with:
name: device-perf-report-csv-${{ matrix.runner-info.arch }}-${{ matrix.runner-info.machine-type }}
path: "${{ steps.check-device-perf-report.outputs.device_perf_report_filename }}"