Merge branch 'master' into new-fix

microsoft · Dec 13, 2023 · 3743929 · 3743929
2 parents 51aa22b + a7900bc
commit 3743929
Show file tree

Hide file tree

Showing 217 changed files with 6,867 additions and 1,195 deletions.
diff --git a/.github/workflows/nv-a6000.yml b/.github/workflows/nv-a6000.yml
@@ -2,9 +2,10 @@ name: nv-a6000
 
 on:
   pull_request:
-    paths-ignore:
-      - 'docs/**'
-      - 'blogs/**'
+    paths:
+      - "deepspeed/inference/v2/**"
+      - "tests/unit/inference/v2/**"
+      - ".github/workflows/nv-a6000.yml"
   workflow_dispatch:
 
 concurrency:
@@ -36,7 +37,7 @@ jobs:
           python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
       - name: Install transformers
         run: |
-          git clone https://github.com/huggingface/transformers
+          git clone --depth=1 https://github.com/huggingface/transformers
           cd transformers
           git rev-parse --short HEAD
           python -m pip install .
@@ -54,3 +55,10 @@ jobs:
           cd tests
           python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2' unit/ --torch_ver="2.0" --cuda_ver="12"
           python -m pytest --color=yes --durations=0 --verbose -rF -m 'inference_v2_ops' unit/ --torch_ver="2.0" --cuda_ver="12"
+      - name: MII unit tests
+        run: |
+          git clone --depth=1 https://github.com/microsoft/DeepSpeed-MII.git
+          cd DeepSpeed-MII
+          pip install .[dev]
+          cd tests
+          python -m pytest --color=yes --durations=0 --verbose -rF ./
diff --git a/.github/workflows/nv-accelerate-v100.yml b/.github/workflows/nv-accelerate-v100.yml
@@ -5,6 +5,8 @@ on:
     paths-ignore:
       - 'docs/**'
       - 'blogs/**'
+      - 'deepspeed/inference/v2/**'
+      - "tests/unit/inference/v2/**"
   merge_group:
     branches: [ master ]
   schedule:

diff --git a/.github/workflows/nv-ds-chat.yml b/.github/workflows/nv-ds-chat.yml
@@ -36,11 +36,7 @@ jobs:
           pip install .[dev]
           ds_report
 
-      - name: Python environment
-        run: |
-          pip list
-
-      - name: DS-Chat unit tests
+      - name: Install deepspeed-chat
         run: |
           BRANCH="master"
           if [[ ! -z "${{ github.event.inputs.dse_branch }}" ]]; then
@@ -50,8 +46,17 @@ jobs:
           git clone -b $BRANCH https://github.com/microsoft/DeepSpeedExamples.git
           cd DeepSpeedExamples/applications/DeepSpeed-Chat
           pip install -r requirements.txt
+          pip install -e .
+
+      - name: Python environment
+        run: |
+          pip list
+
+      - name: DS-Chat unit tests
+        run: |
+          cd DeepSpeedExamples/applications/DeepSpeed-Chat
           unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
-          cd training/tests
+          cd tests
           pytest $PYTEST_OPTS ./
 
       - name: Open GitHub issue if nightly CI fails

diff --git a/.github/workflows/nv-inference.yml b/.github/workflows/nv-inference.yml
@@ -5,6 +5,8 @@ on:
     paths-ignore:
       - 'docs/**'
       - 'blogs/**'
+      - 'deepspeed/inference/v2/**'
+      - "tests/unit/inference/v2/**"
   merge_group:
     branches: [ master ]
   schedule:

diff --git a/.github/workflows/nv-lightning-v100.yml b/.github/workflows/nv-lightning-v100.yml
@@ -5,6 +5,8 @@ on:
     paths-ignore:
       - 'docs/**'
       - 'blogs/**'
+      - 'deepspeed/inference/v2/**'
+      - "tests/unit/inference/v2/**"
   merge_group:
     branches: [ master ]
   schedule:

diff --git a/.github/workflows/nv-megatron.yml b/.github/workflows/nv-megatron.yml
@@ -5,6 +5,8 @@ on:
     paths-ignore:
       - 'docs/**'
       - 'blogs/**'
+      - 'deepspeed/inference/v2/**'
+      - "tests/unit/inference/v2/**"
   merge_group:
     branches: [ master ]
   schedule:

diff --git a/.github/workflows/nv-mii.yml b/.github/workflows/nv-mii.yml
@@ -54,5 +54,5 @@ jobs:
           cd DeepSpeed-MII
           pip install .[dev]
           unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
-          cd tests
+          cd tests/legacy
           pytest $PYTEST_OPTS --forked -m "deepspeed" ./
diff --git a/.github/workflows/nv-pre-compile-ops.yml b/.github/workflows/nv-pre-compile-ops.yml
@@ -7,6 +7,8 @@ on:
     paths-ignore:
       - 'docs/**'
       - 'blogs/**'
+      - 'deepspeed/inference/v2/**'
+      - "tests/unit/inference/v2/**"
   merge_group:
     branches: [ master ]
   schedule:

diff --git a/.github/workflows/nv-sd.yml b/.github/workflows/nv-sd.yml
@@ -0,0 +1,71 @@
+name: nv-sd
+
+on:
+  schedule:
+    - cron: "0 0 * * 0"
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - "deepspeed/ops/transformer/inference/diffusers_**"
+      - "tests/unit/inference/test_stable_diffusion.py"
+      - "deepspeed/model_implementations/diffusers/unet.py"
+      - "deepspeed/model_implementations/diffusers/vae.py"
+      - ".github/workflows/nv-sd.yml"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+  issues: write
+
+jobs:
+  sd-tests:
+    runs-on: [self-hosted, nvidia, a6000]
+    container:
+      image: nvcr.io/nvidia/pytorch:23.03-py3
+      ports:
+        - 80
+      options: --gpus all --shm-size "8G"
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Check container state
+        run: |
+          ldd --version
+          nvcc --version
+          nvidia-smi
+          python -c "import torch; print('torch:', torch.__version__, torch)"
+          python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
+      - name: Install transformers
+        run: |
+          git clone https://github.com/huggingface/transformers
+          cd transformers
+          git rev-parse --short HEAD
+          python -m pip install .
+      - name: Install deepspeed
+        run: |
+          pip install image-similarity-measures
+          python -m pip install opencv-python==4.6.* --force-reinstall
+          python -m pip install docutils==0.18.1 jinja2==3.0 urllib3==1.26.11 ninja
+          python -m pip install .[dev,1bit,autotuning,sd]
+          ds_report
+      - name: Python environment
+        run: |
+          python -m pip list
+      - name: Unit tests
+        run: |
+          unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
+          cd tests
+          python -m pytest --color=yes --durations=0 --verbose -rF -m 'stable_diffusion' -k "TestStableDiffusion" unit/ --torch_ver="2.0" --cuda_ver="12"
+
+      - name: Open GitHub issue if weekly CI fails
+        if: ${{ failure() && (github.event_name == 'schedule') }}
+        uses: JasonEtco/create-an-issue@v2
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
+          update_existing: true
diff --git a/.github/workflows/nv-torch-latest-cpu.yml b/.github/workflows/nv-torch-latest-cpu.yml
@@ -5,6 +5,8 @@ on:
     paths-ignore:
       - 'docs/**'
       - 'blogs/**'
+      - 'deepspeed/inference/v2/**'
+      - "tests/unit/inference/v2/**"
   merge_group:
     branches: [ master ]
   schedule:

diff --git a/.github/workflows/nv-torch-latest-v100.yml b/.github/workflows/nv-torch-latest-v100.yml
@@ -5,6 +5,8 @@ on:
     paths-ignore:
       - 'docs/**'
       - 'blogs/**'
+      - 'deepspeed/inference/v2/**'
+      - "tests/unit/inference/v2/**"
   merge_group:
     branches: [ master ]
   schedule:

diff --git a/.github/workflows/nv-transformers-v100.yml b/.github/workflows/nv-transformers-v100.yml
@@ -5,6 +5,8 @@ on:
     paths-ignore:
       - 'docs/**'
       - 'blogs/**'
+      - 'deepspeed/inference/v2/**'
+      - "tests/unit/inference/v2/**"
   merge_group:
     branches: [ master ]
   schedule:

diff --git a/.gitignore b/.gitignore
@@ -1,31 +1,55 @@
+## Ignore Python compiled files
 *.pyc
+
+## Ignore IDE-specific files and directories
+# JetBrains IDE settings
 .idea/
+# Visual Studio Code settings
+.vscode/
+# Theia IDE settings
+.theia/
+
+## Ignore temporary and backup files
+# General backup files
 *~
+# Vim swap files
 *.swp
+
+## Ignore log files
 *.log
+
+## Ignore a specific generated file
 deepspeed/git_version_info_installed.py
+
+## Ignore Python bytecode cache
 __pycache__
 
-# Build + installation data
+## Build + installation data
+# Build artifacts
 build/
+# Distribution files
 dist/
+# Compiled shared objects
 *.so
+# Deepspeed package info
 deepspeed.egg-info/
+# Build information
 build.txt
 
-# Website
+## Website generated files
+# Jekyll generated site
 docs/_site/
+# Generated documentation
 docs/build
 docs/code-docs/source/_build
 docs/code-docs/_build
 docs/code-docs/build
+# SASS cache
 .sass-cache/
+# Jekyll cache
 .jekyll-cache/
 .jekyll-metadata
 
-# Testing data
+## Testing data
+# Saved checkpoints for testing
 tests/unit/saved_checkpoint/
-
-# Dev/IDE data
-.vscode
-.theia
diff --git a/README.md b/README.md
@@ -15,8 +15,9 @@
 ## Latest News
 <b> <span style="color:orange" > DeepSpeed empowers ChatGPT-like model training with a single click, offering 15x speedup over SOTA RLHF systems with unprecedented cost reduction at all scales; [learn how](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-chat)</span>.</b>
 
+* [2023/11] [Llama 2 Inference on 4th Gen Intel® Xeon® Scalable Processor with DeepSpeed](https://github.com/microsoft/DeepSpeed/tree/master/blogs/intel-inference) [[Intel version]](https://www.intel.com/content/www/us/en/developer/articles/technical/xllama-2-on-xeon-scalable-processor-with-deepspeed.html)
 * [2023/11] [DeepSpeed ZeRO-Offload++: 6x Higher Training Throughput via Collaborative CPU/GPU Twin-Flow](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-offloadpp)
-* [2023/11] [DeepSpeed-FastGen: High-throughput Text Generation for LLMs via MII and DeepSpeed-Inference](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-fastgen) [[English](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-fastgen)] [[中文](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-fastgen/chinese/README.md)]
+* [2023/11] [DeepSpeed-FastGen: High-throughput Text Generation for LLMs via MII and DeepSpeed-Inference](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-fastgen) [[English](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-fastgen)] [[中文](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-fastgen/chinese/README.md)] [[日本語](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-fastgen/japanese/README.md)]
 * [2023/10] [DeepSpeed-VisualChat: Improve Your Chat Experience with Multi-Round Multi-Image Inputs](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-visualchat/10-03-2023/README.md) [[English](https://github.com/microsoft/DeepSpeed/tree/master/blogs/deepspeed-visualchat/10-03-2023/README.md)] [[中文](https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed-visualchat/10-03-2023/README-Chinese.md)] [[日本語](https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed-visualchat/10-03-2023/README-Japanese.md)]
 * [2023/09] Announcing the DeepSpeed4Science Initiative: Enabling large-scale scientific discovery through sophisticated AI system technologies [[DeepSpeed4Science website](https://deepspeed4science.ai/)] [[Tutorials](https://www.deepspeed.ai/deepspeed4science/)] [[White paper](https://arxiv.org/abs/2310.04610)] [[Blog](https://www.microsoft.com/en-us/research/blog/announcing-the-deepspeed4science-initiative-enabling-large-scale-scientific-discovery-through-sophisticated-ai-system-technologies/)] [[中文](https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed4science/chinese/README.md)] [[日本語](https://github.com/microsoft/DeepSpeed/blob/master/blogs/deepspeed4science/japanese/README.md)]
 * [2023/08] [DeepSpeed ZeRO-Inference: 20x faster inference through weight quantization and KV cache offloading](https://github.com/microsoft/DeepSpeedExamples/blob/master/inference/huggingface/zero_inference/README.md)
@@ -128,7 +129,7 @@ DeepSpeed has been integrated with several different popular open-source DL fram
 | AMD | [![amd-mi100](https://github.com/microsoft/DeepSpeed/actions/workflows/amd-mi100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/amd-mi100.yml) [![amd-mi200](https://github.com/microsoft/DeepSpeed/actions/workflows/amd-mi200.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/amd-mi200.yml) |
 | CPU | [![nv-torch-latest-cpu](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-latest-cpu.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-latest-cpu.yml) |
 | PyTorch Nightly | [![nv-torch-nightly-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-nightly-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-torch-nightly-v100.yml) |
-| Integrations | [![nv-transformers-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-transformers-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-transformers-v100.yml) [![nv-lightning-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-lightning-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-lightning-v100.yml) [![nv-accelerate-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-accelerate-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-accelerate-v100.yml) [![nv-megatron](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-megatron.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-megatron.yml) [![nv-mii](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-mii.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-mii.yml) [![nv-ds-chat](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-ds-chat.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-ds-chat.yml) |
+| Integrations | [![nv-transformers-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-transformers-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-transformers-v100.yml) [![nv-lightning-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-lightning-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-lightning-v100.yml) [![nv-accelerate-v100](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-accelerate-v100.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-accelerate-v100.yml) [![nv-megatron](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-megatron.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-megatron.yml) [![nv-mii](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-mii.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-mii.yml) [![nv-ds-chat](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-ds-chat.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-ds-chat.yml) [![nv-sd](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-sd.yml/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/nv-sd.yml) |
 | Misc | [![Formatting](https://github.com/microsoft/DeepSpeed/actions/workflows/formatting.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/formatting.yml) [![pages-build-deployment](https://github.com/microsoft/DeepSpeed/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/microsoft/DeepSpeed/actions/workflows/pages/pages-build-deployment) [![Documentation Status](https://readthedocs.org/projects/deepspeed/badge/?version=latest)](https://deepspeed.readthedocs.io/en/latest/?badge=latest)[![python](https://github.com/microsoft/DeepSpeed/actions/workflows/python.yml/badge.svg?branch=master)](https://github.com/microsoft/DeepSpeed/actions/workflows/python.yml) |
 
 # Installation

diff --git a/accelerator/__init__.py b/accelerator/__init__.py
@@ -4,4 +4,4 @@
 # DeepSpeed Team
 
 from .abstract_accelerator import DeepSpeedAccelerator
-from .real_accelerator import get_accelerator, set_accelerator
+from .real_accelerator import get_accelerator, set_accelerator, is_current_accelerator_supported
diff --git a/accelerator/cpu_accelerator.py b/accelerator/cpu_accelerator.py
@@ -63,7 +63,7 @@ def random(self):
         return torch.random
 
     def set_rng_state(self, new_state, device_index=None):
-        if device_index == None:
+        if device_index is None:
             return torch.set_rng_state(new_state)
         return torch.set_rng_state(new_state, device_index)
 
@@ -253,7 +253,7 @@ def on_accelerator(self, tensor):
     # create an instance of op builder and return, name specified by class_name
     def create_op_builder(self, op_name):
         builder_class = self.get_op_builder(op_name)
-        if builder_class != None:
+        if builder_class is not None:
             return builder_class()
         return None
 

diff --git a/accelerator/cuda_accelerator.py b/accelerator/cuda_accelerator.py
@@ -44,7 +44,7 @@ def is_synchronized_device(self):
 
     # Device APIs
     def device_name(self, device_index=None):
-        if device_index == None:
+        if device_index is None:
             return 'cuda'
         return 'cuda:{}'.format(device_index)
 
@@ -280,7 +280,7 @@ def op_builder_dir(self):
     class_dict = None
 
     def _lazy_init_class_dict(self):
-        if self.class_dict != None:
+        if self.class_dict is not None:
             return
         else:
             self.class_dict = {}

diff --git a/accelerator/mps_accelerator.py b/accelerator/mps_accelerator.py
@@ -26,7 +26,7 @@ def is_synchronized_device(self):
 
     # Device APIs
     def device_name(self, device_index=None):
-        if device_index == None:
+        if device_index is None:
             return "mps"
         return "mps:{}".format(device_index)
 
@@ -221,7 +221,7 @@ def op_builder_dir(self):
     # create an instance of op builder, specified by class_name
     def create_op_builder(self, op_name):
         builder_class = self.get_op_builder(op_name)
-        if builder_class != None:
+        if builder_class is not None:
             return builder_class()
         return None
 

diff --git a/accelerator/npu_accelerator.py b/accelerator/npu_accelerator.py
@@ -30,7 +30,7 @@ def is_synchronized_device(self):
 
     # Device APIs
     def device_name(self, device_index=None):
-        if device_index == None:
+        if device_index is None:
             return 'npu'
         return 'npu:{}'.format(device_index)