Skip to content

[triton-ci] create initial inductor workflow #1

[triton-ci] create initial inductor workflow

[triton-ci] create initial inductor workflow #1

Workflow file for this run

name: inductor
on:
pull_request:
paths:
- '**.yml'
workflow_dispatch:
inputs:
triton_commit:
description: 'Commit SHA to test Triton at'
required: true
default: 'main'
pytorch_commit:
description: 'Commit SHA to test PyTorch at'
required: true
default: 'main'
schedule:
# run nightly at 00:00 PST + random noise
- cron: '15 7 * * *'
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
permissions: read-all
jobs:
# linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
# name: cuda12.1-py3.10-gcc9-sm86
# uses: ./.github/workflows/_linux-build.yml
# with:
# build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
# docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
# cuda-arch-list: '8.6'
# test-matrix: |
# { include: [
# { config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "inductor_distributed", shard: 1, num_shards: 1, runner: "linux.g5.12xlarge.nvidia.gpu" },
# { config: "inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "dynamic_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "dynamic_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "dynamic_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "dynamic_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "dynamic_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "aot_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "aot_inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "inductor_cpp_wrapper_abi_compatible", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
# ]}
# secrets:
# HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
# linux-focal-cuda12_1-py3_10-gcc9-inductor-test:
# name: cuda12.1-py3.10-gcc9-sm86
# uses: ./.github/workflows/_linux-test.yml
# needs: linux-focal-cuda12_1-py3_10-gcc9-inductor-build
# with:
# build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm86
# docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.docker-image }}
# test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-inductor-build.outputs.test-matrix }}
# secrets:
# HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
# linux-focal-cuda12_1-py3_12-gcc9-inductor-build:
# name: cuda12.1-py3.12-gcc9-sm86
# uses: ./.github/workflows/_linux-build.yml
# with:
# build-environment: linux-focal-cuda12.1-py3.12-gcc9-sm86
# docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks
# cuda-arch-list: '8.6'
# test-matrix: |
# { include: [
# { config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# ]}
# linux-focal-cuda12_1-py3_12-gcc9-inductor-test:
# name: cuda12.1-py3.12-gcc9-sm86
# uses: ./.github/workflows/_linux-test.yml
# needs: linux-focal-cuda12_1-py3_12-gcc9-inductor-build
# with:
# build-environment: linux-focal-cuda12.1-py3.12-gcc9-sm86
# docker-image: ${{ needs.linux-focal-cuda12_1-py3_12-gcc9-inductor-build.outputs.docker-image }}
# test-matrix: ${{ needs.linux-focal-cuda12_1-py3_12-gcc9-inductor-build.outputs.test-matrix }}
# linux-jammy-cpu-py3_12-inductor-halide-build:
# name: linux-jammy-cpu-py3.12-gcc11-inductor-halide
# uses: ./.github/workflows/_linux-build.yml
# with:
# build-environment: linux-jammy-py3.12-gcc11
# docker-image-name: pytorch-linux-jammy-py3.12-halide
# test-matrix: |
# { include: [
# { config: "inductor-halide", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
# ]}
# linux-jammy-cpu-py3_12-inductor-halide-test:
# name: linux-jammy-cpu-py3.12-gcc11-inductor-halide
# uses: ./.github/workflows/_linux-test.yml
# needs: linux-jammy-cpu-py3_12-inductor-halide-build
# with:
# build-environment: linux-jammy-py3.12-gcc11
# docker-image: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.docker-image }}
# test-matrix: ${{ needs.linux-jammy-cpu-py3_12-inductor-halide-build.outputs.test-matrix }}
# linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
# # Should be synced with the one in inductor-periodic.yml but this only runs inductor_timm
# name: cuda12.4-py3.10-gcc9-sm86
# uses: ./.github/workflows/_linux-build.yml
# with:
# sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
# build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
# docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
# cuda-arch-list: '8.6'
# test-matrix: |
# { include: [
# { config: "inductor_timm", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# { config: "inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
# ]}
# secrets:
# HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
# linux-focal-cuda12_4-py3_10-gcc9-inductor-test:
# name: cuda12.4-py3.10-gcc9-sm86
# uses: ./.github/workflows/_linux-test.yml
# needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build
# with:
# sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-test
# build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86
# docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }}
# test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }}
# secrets:
# HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
linux-jammy-cpu-py3_8-gcc11-inductor-build:
name: linux-jammy-cpu-py3.8-gcc11-inductor
uses: ./.github/workflows/_linux-build.yml
with:
build-environment: linux-jammy-py3.8-gcc11-build
docker-image-name: pytorch-linux-jammy-py3.8-gcc11-inductor-benchmarks
test-matrix: |
{ include: [
{ config: "cpu_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
{ config: "cpu_inductor_timm", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_timm", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_huggingface_freezing", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
{ config: "cpu_inductor_timm_freezing", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_timm_freezing", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_torchbench_freezing", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_torchbench_freezing", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_inductor_huggingface_amp_freezing", shard: 1, num_shards: 1, runner: "linux.16xlarge.spr" },
{ config: "cpu_inductor_timm_amp_freezing", shard: 1, num_shards: 2, runner: "linux.16xlarge.spr" },
{ config: "cpu_inductor_timm_amp_freezing", shard: 2, num_shards: 2, runner: "linux.16xlarge.spr" },
{ config: "cpu_inductor_torchbench_amp_freezing", shard: 1, num_shards: 2, runner: "linux.16xlarge.spr" },
{ config: "cpu_inductor_torchbench_amp_freezing", shard: 2, num_shards: 2, runner: "linux.16xlarge.spr" },
{ config: "dynamic_cpu_inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
{ config: "dynamic_cpu_inductor_timm", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "dynamic_cpu_inductor_timm", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "dynamic_cpu_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "dynamic_cpu_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_aot_inductor_huggingface_freezing", shard: 1, num_shards: 1, runner: "linux.12xlarge" },
{ config: "cpu_aot_inductor_timm_freezing", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_aot_inductor_timm_freezing", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_aot_inductor_torchbench_freezing", shard: 1, num_shards: 2, runner: "linux.12xlarge" },
{ config: "cpu_aot_inductor_torchbench_freezing", shard: 2, num_shards: 2, runner: "linux.12xlarge" },
{ config: "inductor_torchbench_cpu_smoketest_perf", shard: 1, num_shards: 1, runner: "linux.24xl.spr-metal" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
linux-jammy-cpu-py3_8-gcc11-inductor-test:
name: linux-jammy-cpu-py3.8-gcc11-inductor
uses: ./.github/workflows/_linux-test.yml
needs: linux-jammy-cpu-py3_8-gcc11-inductor-build
with:
build-environment: linux-jammy-py3.8-gcc11-build
docker-image: ${{ needs.linux-jammy-cpu-py3_8-gcc11-inductor-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cpu-py3_8-gcc11-inductor-build.outputs.test-matrix }}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}