Adding cuda kernel (optimized for sm80) for block-wise 4b quantized float 16 GEMM. #24407
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Linux_CI | |
on: | |
push: | |
branches: | |
- main | |
- rel-* | |
pull_request: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
jobs: | |
Onnxruntime-TVM: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: true | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.8.x' | |
architecture: 'x64' | |
- name: 'Setup TVM EP requirements' | |
run: | | |
set -e -x | |
sudo apt-get update | |
sudo apt-get install -y libtinfo-dev zlib1g-dev build-essential libedit-dev libxml2-dev nasm | |
python3 -m pip install -r ${{ github.workspace }}/tools/ci_build/github/linux/tvm/requirements.txt | |
- name: 'Build and Test' | |
run: | | |
python3 ${{ github.workspace }}/tools/ci_build/build.py --build_dir build --config Release --skip_submodule_sync --parallel --enable_pybind --disable_contrib_ops --disable_ml_ops --skip_onnx_tests --use_tvm --use_tvm_hash --ctest_path "" |