From 0a143f0df32c5189974d46bdb6eaa733cd9cb287 Mon Sep 17 00:00:00 2001 From: sjh Date: Mon, 21 Oct 2024 10:21:16 +0800 Subject: [PATCH 1/8] add requirements --- .github/workflows/deepspeed.yaml | 36 +++++++++---------------- README.md | 15 +++++++++++ requirements/requirements_deepspeed.txt | 28 +++++++++++++++++++ 3 files changed, 55 insertions(+), 24 deletions(-) create mode 100644 requirements/requirements_deepspeed.txt diff --git a/.github/workflows/deepspeed.yaml b/.github/workflows/deepspeed.yaml index d423ad708..80b20f381 100644 --- a/.github/workflows/deepspeed.yaml +++ b/.github/workflows/deepspeed.yaml @@ -7,13 +7,13 @@ on: workflow_dispatch: pull_request: paths: - - '.github/workflows/deepspeed.yml' + - '.github/workflows/deepspeed.yaml' - 'requirements/**' schedule: - cron: "0 0 * * *" push: paths: - - '.github/workflows/deepspeed.yml' + - '.github/workflows/deepspeed.yaml' concurrency: @@ -39,21 +39,13 @@ jobs: - /etc/ascend_install.info:/etc/ascend_install.info options: --network host --name deepspeed_unit-tests - --device /dev/davinci5 + --device /dev/davinci4 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc --shm-size "20g" --entrypoint /bin/bash - - env: - PT_HPU_LAZY_MODE: 0 - TORCHINDUCTOR_COMPILE_THREADS: 1 - TEST_LIST: | - test_accelerator.py - test_autotuning.py - test_compression.py - + steps: - uses: actions/checkout@v4 @@ -61,10 +53,12 @@ jobs: run: | npu-smi info apt-get update + pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple source /root/.bashrc - pip install torch==2.2.0 torchvision==0.17.0 torch_npu==2.2.0 numpy==1.26.4 cloudpickle tornado ml-dtypes -i https://pypi.tuna.tsinghua.edu.cn/simple + pip install torch==2.2.0 torchvision==0.17.0 torch_npu==2.2.0 torchaudio==2.2.0 numpy==1.26.4 cloudpickle tornado ml-dtypes + python --version python << EOF if __name__ == '__main__': import torch @@ -74,23 +68,17 @@ jobs: print(f"Device Count: {torch.npu.device_count()}") print(f"Device Available: {torch.npu.is_available()}") EOF - - # - name: Install transformers - # run: | - # source /root/.bashrc - # echo "y" | apt-get install git - # git clone https://github.com/huggingface/transformers - # cd transformers - # git rev-parse --short HEAD - # pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple - + - name: Install deepspeed run: | source /root/.bashrc echo "y" | apt-get install git git clone --depth=1 https://github.com/microsoft/DeepSpeed.git + pip install -r requirements/requirements_deepspeed.txt cd DeepSpeed - pip install .[dev,autotuning] -i https://pypi.tuna.tsinghua.edu.cn/simple + pip install . + + print("run ds_report...") ds_report - name: Python environment diff --git a/README.md b/README.md index 93ccb298e..780ec34e5 100644 --- a/README.md +++ b/README.md @@ -44,5 +44,20 @@ This [PR](https://github.com/microsoft/onnxruntime/pull/15833) refactored the Ex Update on 2023.06.08 This [PR](https://github.com/microsoft/onnxruntime/pull/14731) introduced a missing registration of CANN Identity operator for version greater than 14. It has been fixed in this [PR](https://github.com/microsoft/onnxruntime/pull/16210). +## Deepspeed Ascend CI +The Deepspeed source code is from `main` branch of `microsoft/deepspeed` and will be run and tested daily with Ascend related. + +------------------------------------------------------------ + +| Key | Value | +| :---: | :---: | +| CPU | Arrch64 | +| NPU | Ascend910B | +| OS | Ubantu | +| Period | UTC 1200 daily | +| Branch | main | +| Status | ![Deepspeed](https://github.com/Ascend/Ascend-CI/actions/workflows/deepspeed.yaml/badge.svg) | +| Recheck By Hand | comment 'recheck' in any issue | + ## Pytorch Ascend CI TBD diff --git a/requirements/requirements_deepspeed.txt b/requirements/requirements_deepspeed.txt new file mode 100644 index 000000000..66cc83714 --- /dev/null +++ b/requirements/requirements_deepspeed.txt @@ -0,0 +1,28 @@ +accelerate +clang-format==18.1.3 +comet_ml>=3.41.0 +docutils<0.18 +future +importlib-metadata>=4 +mup +pre-commit>=2.20.0 +pytest>=7.2.0 +pytest-forked +pytest-randomly +pytest-xdist +qtorch==0.3.0 +recommonmark +sphinx +sphinx-rtd-theme +tensorboard +torchvision +transformers>=4.39.0 +wandb +google +lm-eval==0.3.0 +protobuf +qtorch +safetensors +sentencepiece +transformers>=4.32.1 +tabulate \ No newline at end of file From 6da0b14034c1aa12dbcb7aaf6872b38f0446cd7d Mon Sep 17 00:00:00 2001 From: jiahao su Date: Mon, 21 Oct 2024 16:27:41 +0800 Subject: [PATCH 2/8] Update deepspeed.yaml --- .github/workflows/deepspeed.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/deepspeed.yaml b/.github/workflows/deepspeed.yaml index 80b20f381..ec64f3d90 100644 --- a/.github/workflows/deepspeed.yaml +++ b/.github/workflows/deepspeed.yaml @@ -77,8 +77,6 @@ jobs: pip install -r requirements/requirements_deepspeed.txt cd DeepSpeed pip install . - - print("run ds_report...") ds_report - name: Python environment From 5f31e031b722e73ccb96c2a5d221c300f55cbef2 Mon Sep 17 00:00:00 2001 From: jiahao su Date: Mon, 21 Oct 2024 16:56:19 +0800 Subject: [PATCH 3/8] Update deepspeed.yaml --- .github/workflows/deepspeed.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deepspeed.yaml b/.github/workflows/deepspeed.yaml index ec64f3d90..b87f674ab 100644 --- a/.github/workflows/deepspeed.yaml +++ b/.github/workflows/deepspeed.yaml @@ -74,9 +74,9 @@ jobs: source /root/.bashrc echo "y" | apt-get install git git clone --depth=1 https://github.com/microsoft/DeepSpeed.git - pip install -r requirements/requirements_deepspeed.txt cd DeepSpeed pip install . + pip install -r ../requirements/requirements_deepspeed.txt ds_report - name: Python environment From 1617f8b34459cfa6ad96427dbdc3d3c7e0a3186b Mon Sep 17 00:00:00 2001 From: jiahao su Date: Mon, 21 Oct 2024 19:05:26 +0800 Subject: [PATCH 4/8] Update deepspeed.yaml --- .github/workflows/deepspeed.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/deepspeed.yaml b/.github/workflows/deepspeed.yaml index b87f674ab..341c9332c 100644 --- a/.github/workflows/deepspeed.yaml +++ b/.github/workflows/deepspeed.yaml @@ -68,6 +68,15 @@ jobs: print(f"Device Count: {torch.npu.device_count()}") print(f"Device Available: {torch.npu.is_available()}") EOF + + - name: Install transformers + run: | + source /root/.bashrc + echo "y" | apt-get install git + git clone https://github.com/huggingface/transformers + cd transformers + git rev-parse --short HEAD + pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple - name: Install deepspeed run: | From 6593f878c1fbc0b720f77af4df3ead7dea6d392e Mon Sep 17 00:00:00 2001 From: sjh Date: Tue, 22 Oct 2024 09:40:45 +0800 Subject: [PATCH 5/8] modify requirements_deepspeed.txt --- .github/workflows/deepspeed.yaml | 2 +- requirements/requirements_deepspeed.txt | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/deepspeed.yaml b/.github/workflows/deepspeed.yaml index 341c9332c..78fcc08e6 100644 --- a/.github/workflows/deepspeed.yaml +++ b/.github/workflows/deepspeed.yaml @@ -84,7 +84,7 @@ jobs: echo "y" | apt-get install git git clone --depth=1 https://github.com/microsoft/DeepSpeed.git cd DeepSpeed - pip install . + pip install .[1bit,autotuning,inf] pip install -r ../requirements/requirements_deepspeed.txt ds_report diff --git a/requirements/requirements_deepspeed.txt b/requirements/requirements_deepspeed.txt index 66cc83714..cb6b0759a 100644 --- a/requirements/requirements_deepspeed.txt +++ b/requirements/requirements_deepspeed.txt @@ -1,6 +1,7 @@ accelerate clang-format==18.1.3 comet_ml>=3.41.0 +# deepspeed-kernels ; sys_platform == 'linux' docutils<0.18 future importlib-metadata>=4 @@ -18,11 +19,11 @@ tensorboard torchvision transformers>=4.39.0 wandb -google -lm-eval==0.3.0 -protobuf -qtorch -safetensors -sentencepiece -transformers>=4.32.1 -tabulate \ No newline at end of file +# google +# lm-eval==0.3.0 +# protobuf +# qtorch +# safetensors +# sentencepiece +# transformers>=4.32.1 +# tabulate \ No newline at end of file From 1c81cd93bf43f2ecfcfb6387afc1e833bf6833d6 Mon Sep 17 00:00:00 2001 From: jiahao su Date: Tue, 22 Oct 2024 10:34:13 +0800 Subject: [PATCH 6/8] Update deepspeed.yaml --- .github/workflows/deepspeed.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/deepspeed.yaml b/.github/workflows/deepspeed.yaml index 78fcc08e6..8bbe4bd66 100644 --- a/.github/workflows/deepspeed.yaml +++ b/.github/workflows/deepspeed.yaml @@ -69,14 +69,14 @@ jobs: print(f"Device Available: {torch.npu.is_available()}") EOF - - name: Install transformers - run: | - source /root/.bashrc - echo "y" | apt-get install git - git clone https://github.com/huggingface/transformers - cd transformers - git rev-parse --short HEAD - pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple + # - name: Install transformers + # run: | + # source /root/.bashrc + # echo "y" | apt-get install git + # git clone https://github.com/huggingface/transformers + # cd transformers + # git rev-parse --short HEAD + # pip install . - name: Install deepspeed run: | @@ -84,8 +84,8 @@ jobs: echo "y" | apt-get install git git clone --depth=1 https://github.com/microsoft/DeepSpeed.git cd DeepSpeed - pip install .[1bit,autotuning,inf] - pip install -r ../requirements/requirements_deepspeed.txt + pip install -r ../requirements/requirements_deepspeed.txt + pip install .[1bit,autotuning,inf] ds_report - name: Python environment From 698a97b425d46de93102f7861adda3a01a99316a Mon Sep 17 00:00:00 2001 From: jiahao su Date: Mon, 28 Oct 2024 10:21:34 +0800 Subject: [PATCH 7/8] Update deepspeed.yaml --- .github/workflows/deepspeed.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deepspeed.yaml b/.github/workflows/deepspeed.yaml index 8bbe4bd66..612b9b5c3 100644 --- a/.github/workflows/deepspeed.yaml +++ b/.github/workflows/deepspeed.yaml @@ -40,6 +40,7 @@ jobs: options: --network host --name deepspeed_unit-tests --device /dev/davinci4 + --device /dev/davinci6 --device /dev/davinci_manager --device /dev/devmm_svm --device /dev/hisi_hdc From 020885829f2175eff4b4971adc05f599ffbc70af Mon Sep 17 00:00:00 2001 From: jiahao su Date: Mon, 28 Oct 2024 10:24:35 +0800 Subject: [PATCH 8/8] Update deepspeed.yaml --- .github/workflows/deepspeed.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/deepspeed.yaml b/.github/workflows/deepspeed.yaml index 612b9b5c3..cb0ba0a34 100644 --- a/.github/workflows/deepspeed.yaml +++ b/.github/workflows/deepspeed.yaml @@ -39,7 +39,6 @@ jobs: - /etc/ascend_install.info:/etc/ascend_install.info options: --network host --name deepspeed_unit-tests - --device /dev/davinci4 --device /dev/davinci6 --device /dev/davinci_manager --device /dev/devmm_svm