Skip to content

Commit

Permalink
Select tests using model flags
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Nov 13, 2024
1 parent f526b55 commit a7e26d1
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 74 deletions.
6 changes: 3 additions & 3 deletions .buildkite/run-cpu-test-ppc64le.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ function cpu_tests() {
decord einops librosa peft Pillow sentence-transformers soundfile \
transformers_stream_generator matplotlib datamodel_code_generator
pip install torchvision --index-url https://download.pytorch.org/whl/cpu
pytest -v -s tests/models/embedding/language
pytest -v -s tests/models/encoder_decoder/language
pytest -v -s tests/models/decoder_only/language/test_models.py
pytest -v -s tests/models/decoder_only/language -m cpu_model
pytest -v -s tests/models/embedding/language -m cpu_model
pytest -v -s tests/models/encoder_decoder/language -m cpu_model
pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"

Expand Down
6 changes: 3 additions & 3 deletions .buildkite/run-cpu-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ function cpu_tests() {
decord einops librosa peft Pillow sentence-transformers soundfile \
transformers_stream_generator matplotlib datamodel_code_generator
pip install torchvision --index-url https://download.pytorch.org/whl/cpu
pytest -v -s tests/models/embedding/language
pytest -v -s tests/models/encoder_decoder/language
pytest -v -s tests/models/decoder_only/language/test_models.py
pytest -v -s tests/models/decoder_only/language -m cpu_model
pytest -v -s tests/models/embedding/language -m cpu_model
pytest -v -s tests/models/encoder_decoder/language -m cpu_model
pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"

Expand Down
38 changes: 20 additions & 18 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -322,62 +322,64 @@ steps:
- pytest -v -s models/test_oot_registration.py # it needs a clean process
- pytest -v -s models/*.py --ignore=models/test_oot_registration.py

- label: Decoder-only Language Models Test (Standard) # 18min
- label: Language Models Test (Standard) # 25min
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
- tests/models/decoder_only/language
- tests/models/embedding/language
- tests/models/encoder_decoder/language
commands:
- pytest -v -s models/decoder_only/language -m core_model
- pytest -v -s models/decoder_only/language -m quant_model
- pytest -v -s models/embedding/language -m core_model
- pytest -v -s models/embedding/vision_language -m core_model

- label: Decoder-only Language Models Test (Extended) # 46min
- label: Language Models Test (Extended) # 50min
nightly: true
source_file_dependencies:
- vllm/
- tests/models/decoder_only/language
- tests/models/embedding/language
- tests/models/encoder_decoder/language
commands:
- pytest -v -s models/decoder_only/language -m 'not core_model and not quant_model'
- pytest -v -s models/embedding/language -m 'not core_model'
- pytest -v -s models/embedding/vision_language -m 'not core_model'

- label: Decoder-only Multi-Modal Models Test (Standard) # 22min
- label: Multi-Modal Models Test (Standard) # 25min
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
- tests/models/decoder_only/audio_language
- tests/models/decoder_only/vision_language
- tests/models/embedding/vision_language
- tests/models/encoder_decoder/vision_language
commands:
- pytest -v -s models/decoder_only/audio_language -m core_model
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m core_model
# No tests under this group for now
# - pytest -v -s models/decoder_only/audio_language -m quant_model
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m quant_model
- pytest -v -s models/encoder_decoder/language -m core_model
- pytest -v -s models/encoder_decoder/vision_language -m core_model

- label: Decoder-only Multi-Modal Models Test (Extended) # 1h10m
- label: Multi-Modal Models Test (Extended) # 1h10m
nightly: true
source_file_dependencies:
- vllm/
- tests/models/decoder_only/audio_language
- tests/models/decoder_only/vision_language
- tests/models/embedding/vision_language
- tests/models/encoder_decoder/vision_language
commands:
- pytest -v -s models/decoder_only/audio_language -m 'not core_model and not quant_model'
# HACK - run phi3v tests separately to sidestep this transformers bug
# https://github.com/huggingface/transformers/issues/34307
- pytest -v -s models/decoder_only/vision_language/test_phi3v.py
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'not core_model and not quant_model'

- label: Other Models Test # 20min
#mirror_hardwares: [amd]
source_file_dependencies:
- vllm/
- tests/models/embedding/language
- tests/models/embedding/vision_language
- tests/models/encoder_decoder/language
- tests/models/encoder_decoder/vision_language
commands:
- pytest -v -s models/embedding/language
- pytest -v -s models/embedding/vision_language
- pytest -v -s models/encoder_decoder/language
- pytest -v -s models/encoder_decoder/vision_language
- pytest -v -s models/encoder_decoder/language -m 'not core_model'
- pytest -v -s models/encoder_decoder/vision_language -m 'not core_model'

# This test is used only in PR development phase to test individual models and should never run on main
- label: Custom Models Test
Expand Down
71 changes: 44 additions & 27 deletions tests/models/decoder_only/language/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,54 @@
"""
import pytest

from vllm.platforms import current_platform

from ...utils import check_logprobs_close

MODELS = [
"facebook/opt-125m", # opt
"openai-community/gpt2", # gpt2
# "Milos/slovak-gpt-j-405M", # gptj
# "bigcode/tiny_starcoder_py", # gpt_bigcode
# "EleutherAI/pythia-70m", # gpt_neox
"bigscience/bloom-560m", # bloom - testing alibi slopes
"microsoft/phi-2", # phi
# "stabilityai/stablelm-3b-4e1t", # stablelm
# "bigcode/starcoder2-3b", # starcoder2
"google/gemma-1.1-2b-it", # gemma
"Qwen/Qwen2.5-0.5B-Instruct", # qwen2
"meta-llama/Llama-3.2-1B-Instruct", # llama
]

if not current_platform.is_cpu():
MODELS += [
# fused_moe which not supported on CPU
"openbmb/MiniCPM3-4B",
]

target_dtype = "half"


@pytest.mark.core_model
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.cpu_model
@pytest.mark.parametrize(
"model",
[
pytest.param(
"facebook/opt-125m", # opt
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
pytest.param(
"openai-community/gpt2", # gpt2
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
pytest.param("Milos/slovak-gpt-j-405M"), # gptj
pytest.param("bigcode/tiny_starcoder_py"), # gpt_bigcode
pytest.param("EleutherAI/pythia-70m"), # gpt_neox
pytest.param(
"bigscience/bloom-560m", # bloom - testing alibi slopes
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
pytest.param(
"microsoft/phi-2", # phi
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
pytest.param("stabilityai/stablelm-3b-4e1t"), # stablelm
pytest.param("bigcode/starcoder2-3b"), # starcoder2
pytest.param(
"google/gemma-1.1-2b-it", # gemma
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
pytest.param(
"Qwen/Qwen2.5-0.5B-Instruct", # qwen2
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
pytest.param(
"meta-llama/Llama-3.2-1B-Instruct", # llama
marks=[pytest.mark.core_model, pytest.mark.cpu_model],
),
pytest.param(
"openbmb/MiniCPM3-4B",
# fused_moe not supported on CPU
marks=[pytest.mark.core_model],
),
])
@pytest.mark.parametrize("dtype", ["half"])
@pytest.mark.parametrize("max_tokens", [32])
@pytest.mark.parametrize("num_logprobs", [5])
def test_models(
Expand Down
10 changes: 7 additions & 3 deletions tests/models/embedding/language/test_cls_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@
import torch
from transformers import AutoModelForSequenceClassification

CLASSIFICATION_MODELS = ["jason9693/Qwen2.5-1.5B-apeach"]


@pytest.mark.parametrize("model", CLASSIFICATION_MODELS)
@pytest.mark.parametrize(
"model",
[
pytest.param("jason9693/Qwen2.5-1.5B-apeach",
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
],
)
@pytest.mark.parametrize("dtype", ["float"])
def test_classification_models(
hf_runner,
Expand Down
34 changes: 15 additions & 19 deletions tests/models/embedding/language/test_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,24 @@
"""
import pytest

from vllm.utils import current_platform

from ..utils import check_embeddings_close

# Model, Guard
MODELS = [
# "intfloat/e5-mistral-7b-instruct",
# "BAAI/bge-base-en-v1.5",
# "BAAI/bge-multilingual-gemma2",
"ssmits/Qwen2-7B-Instruct-embed-base",
"Alibaba-NLP/gte-Qwen2-1.5B-instruct",
]

ENCODER_ONLY = [
"BAAI/bge-base-en-v1.5",
]


@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize(
"model",
[
# [Encoder-only]
pytest.param("BAAI/bge-base-en-v1.5",
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
# [Encoder-decoder]
pytest.param("intfloat/e5-mistral-7b-instruct",
marks=[pytest.mark.core_model]),
pytest.param("BAAI/bge-multilingual-gemma2",
marks=[pytest.mark.core_model]),
pytest.param("ssmits/Qwen2-7B-Instruct-embed-base"),
pytest.param("Alibaba-NLP/gte-Qwen2-1.5B-instruct"),
],
)
@pytest.mark.parametrize("dtype", ["half"])
def test_models(
hf_runner,
Expand All @@ -31,9 +30,6 @@ def test_models(
model,
dtype: str,
) -> None:
if model not in ENCODER_ONLY and current_platform.is_cpu():
pytest.skip("Skip large embedding models test on CPU.")

# The example_prompts has ending "\n", for example:
# "Write a short story about a robot that dreams for the first time.\n"
# sentence_transformers will strip the input texts, see:
Expand Down
2 changes: 2 additions & 0 deletions tests/models/embedding/vision_language/test_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def _run_test(

@pytest.mark.skipif(transformers.__version__.startswith("4.46"),
reason="Model broken with changes in transformers 4.46")
@pytest.mark.core_model
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
def test_models_text(
Expand All @@ -112,6 +113,7 @@ def test_models_text(


@large_gpu_test(min_gb=48)
@pytest.mark.core_model
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
def test_models_image(
Expand Down
2 changes: 2 additions & 0 deletions tests/models/embedding/vision_language/test_phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def _run_test(
)


@pytest.mark.core_model
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
def test_models_text(
Expand All @@ -98,6 +99,7 @@ def test_models_text(


@large_gpu_test(min_gb=48)
@pytest.mark.core_model
@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize("dtype", ["half"])
def test_models_image(
Expand Down
9 changes: 8 additions & 1 deletion tests/models/encoder_decoder/language/test_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,14 @@ def run_test(
)


@pytest.mark.parametrize("model", MODELS)
@pytest.mark.parametrize(
"model",
[
pytest.param("facebook/bart-base",
marks=[pytest.mark.core_model, pytest.mark.cpu_model]),
pytest.param("facebook/bart-large-cnn"),
],
)
@pytest.mark.parametrize("dtype", ["float", "bfloat16"])
@pytest.mark.parametrize("max_tokens", [64])
@pytest.mark.parametrize("num_logprobs", [5])
Expand Down
3 changes: 3 additions & 0 deletions tests/models/encoder_decoder/vision_language/test_mllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ def clear_cache():


@large_gpu_test(min_gb=48)
@pytest.mark.core_model
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize(
"sizes",
Expand Down Expand Up @@ -278,6 +279,7 @@ def test_models_single_leading_image(hf_runner, vllm_runner, image_assets,


@large_gpu_test(min_gb=48)
@pytest.mark.core_model
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [128])
Expand Down Expand Up @@ -326,6 +328,7 @@ def test_models_multi_leading_images(hf_runner, vllm_runner, image_assets,


@large_gpu_test(min_gb=48)
@pytest.mark.core_model
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [128])
Expand Down

0 comments on commit a7e26d1

Please sign in to comment.