Skip to content

Commit

Permalink
add CPU autotp UT (#4263)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yejing-Lai authored Sep 27, 2023
1 parent 28b9d5c commit 388c848
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 5 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/cpu-inference.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,5 @@ jobs:
source oneCCL/build/_install/env/setvars.sh
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' -m 'inference_ops' -m 'inference' unit/
TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'seq_inference' unit/
TRANSFORMERS_CACHE=~/tmp/transformers_cache/ TORCH_EXTENSIONS_DIR=./torch-extensions pytest -m 'inference_ops' -m 'inference' unit/
4 changes: 4 additions & 0 deletions tests/unit/hybrid_engine/test_he_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
from deepspeed.accelerator import get_accelerator

from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM)
from deepspeed.ops.op_builder import InferenceBuilder

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

rocm_version = OpBuilder.installed_rocm_version()
if rocm_version != (0, 0):
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/hybrid_engine/test_he_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
from deepspeed.accelerator import get_accelerator

from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM)
from deepspeed.ops.op_builder import InferenceBuilder

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

rocm_version = OpBuilder.installed_rocm_version()
if rocm_version != (0, 0):
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/hybrid_engine/test_he_lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
from deepspeed.utils import safe_get_full_grad
import numpy.testing as npt
from unit.common import DistributedTest
from deepspeed.ops.op_builder import InferenceBuilder

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM)

Expand Down
18 changes: 14 additions & 4 deletions tests/unit/inference/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@
from deepspeed.accelerator import get_accelerator
from deepspeed.ops.op_builder import InferenceBuilder

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

rocm_version = OpBuilder.installed_rocm_version()
if rocm_version != (0, 0):
pytest.skip("skip inference tests on rocm for now", allow_module_level=True)
Expand Down Expand Up @@ -365,6 +362,9 @@ def test(
if invalid_test_msg:
pytest.skip(invalid_test_msg)

if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)

model, task = model_w_task
local_rank = int(os.getenv("LOCAL_RANK", "0"))

Expand Down Expand Up @@ -401,6 +401,9 @@ def test(
):
model, task = model_w_task
dtype = torch.float16
if dtype not in get_accelerator().supported_dtypes():
pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.")

local_rank = int(os.getenv("LOCAL_RANK", "0"))

pipe = pipeline(task, model=model, model_kwargs={"low_cpu_mem_usage": True}, device=local_rank, framework="pt")
Expand Down Expand Up @@ -514,7 +517,7 @@ def test(
[("Helsinki-NLP/opus-mt-en-de", "translation"), ("Salesforce/codegen-350M-mono", "text-generation")],
ids=["marian", "codegen"], #codegen has fusedqkv weight.
)
@pytest.mark.parametrize("dtype", [torch.float16], ids=["fp16"])
@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16], ids=["fp16", "bf16"])
class TestAutoTensorParallelism(DistributedTest):
world_size = [2]

Expand All @@ -530,6 +533,13 @@ def test(
if invalid_test_msg:
pytest.skip(invalid_test_msg)

if dtype not in get_accelerator().supported_dtypes():
pytest.skip(f"Acceleraor {get_accelerator().device_name()} does not support {dtype}.")

# TODO: enable this test after torch 2.1 stable release
if dtype == torch.bfloat16 and model_w_task[0] == "Salesforce/codegen-350M-mono":
pytest.skip("Codegen model(bf16) need to use torch version > 2.0.")

model, task = model_w_task
local_rank = int(os.getenv("LOCAL_RANK", "0"))
world_size = int(os.getenv("WORLD_SIZE", "2"))
Expand Down

0 comments on commit 388c848

Please sign in to comment.