Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{ai}[foss/2023a] vLLM v0.4.0, xformers v0.0.23.post1 w/ CUDA 12.1.1 #21901

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions easybuild/easyconfigs/v/vLLM/vLLM-0.4.0-foss-2023a-CUDA-12.1.1.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
easyblock = 'PythonBundle'

name = 'vLLM'
version = '0.4.0'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/vllm-project/vllm'
description = """A high-throughput and memory-efficient inference and serving engine for LLMs."""

toolchain = {'name': 'foss', 'version': '2023a'}

builddependencies = [
('hatchling', '1.18.0'),
('maturin', '1.1.0'),
('CMake', '3.26.3'),
('Ninja', '1.11.1'),
]
dependencies = [
('CUDA', '12.1.1', '', SYSTEM),
('Python', '3.11.3'),
('Python-bundle-PyPI', '2023.06'),
('SciPy-bundle', '2023.07'),
('SentencePiece', '0.2.0'),
('py-cpuinfo', '9.0.0'),
('Transformers', '4.39.3'),
('pydantic', '2.5.3'),
('tiktoken', '0.6.0'),
('PyTorch-bundle', '2.1.2', versionsuffix),
('Triton', '2.1.0', versionsuffix),
('Ray-project', '2.9.1'),
('numba', '0.58.1'),
('xformers', '0.0.23.post1', versionsuffix),
]

use_pip = True

exts_list = [
('sniffio', '1.3.0', {
'checksums': ['e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101'],
}),
('anyio', '4.3.0', {
'checksums': ['f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6'],
}),
('starlette', '0.36.3', {
'checksums': ['90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080'],
}),
('fastapi', '0.110.0', {
'checksums': ['266775f0dcc95af9d3ef39bad55cff525329a931d5fd51930aadd4f428bf7ff3'],
}),
('h11', '0.14.0', {
'checksums': ['8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d'],
}),
('uvicorn', '0.30.0', {
'checksums': ['f678dec4fa3a39706bbf49b9ec5fc40049d42418716cea52b53f07828a60aa37'],
}),
('prometheus_client', '0.21.0', {
'checksums': ['96c83c606b71ff2b0a433c98889d275f51ffec6c5e267de37c7a2b5c9aa9233e'],
}),
('pynvml', '11.5.0', {
'checksums': ['d027b21b95b1088b9fc278117f9f61b7c67f8e33a787e9f83f735f0f71ac32d0'],
}),
('diskcache', '5.6.3', {
'checksums': ['2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc'],
}),
('interegular', '0.3.3', {
'checksums': ['d9b697b21b34884711399ba0f0376914b81899ce670032486d0d048344a76600'],
}),
('lark', '1.2.2', {
'checksums': ['ca807d0162cd16cef15a8feecb862d7319e7a09bdb13aef927968e45040fed80'],
}),
('nest_asyncio', '1.6.0', {
'checksums': ['6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe'],
}),
('rpds_py', '0.18.0', {
'modulename': 'rpds',
'checksums': ['42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d'],
}),
('referencing', '0.35.1', {
'checksums': ['25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c'],
}),
('outlines', '0.0.34', {
'checksums': ['594e7204c770b47a62eb5c2ba7d25ea0ab2e16882b5f04556712a0228d3d3309'],
}),
(name, version, {
# delete cmake and ninja deps to let pip check pass
'preinstallopts': "sed -i -e '/cmake>=3.21/d' -e '/ninja/d' requirements.txt && ",
'source_urls': ['https://github.com/vllm-project/vllm/archive/'],
'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}],
'checksums': ['95e5fa8f6ac63f11b4c7122e01c0892beae91f754a64a7e525bc06c3b49feac7'],
}),
]

sanity_pip_check = True

sanity_check_commands = ["python -c 'from vllm import LLM, SamplingParams'"]

moduleclass = 'ai'
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
easyblock = 'PythonPackage'

name = 'xformers'
version = '0.0.23.post1'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/facebookresearch/xformers'
description = """Flexible Transformers, defined by interoperable and optimized building blocks."""

toolchain = {'name': 'foss', 'version': '2023a'}

sources = [SOURCE_TAR_GZ]
checksums = ['b443b158bd7b5275b485d2c6aee94ebc2152878fd784e379b1c8bcb1d67f3b81']

builddependencies = [
('CMake', '3.26.3'),
('Ninja', '1.11.1'),
]
dependencies = [
('CUDA', '12.1.1', '', SYSTEM),
('Python', '3.11.3'),
('Python-bundle-PyPI', '2023.06'),
('SciPy-bundle', '2023.07'),
('PyTorch-bundle', '2.1.2', versionsuffix),
('flash-attention', '2.6.3', versionsuffix),
('CUTLASS', '3.4.0', versionsuffix),
('Triton', '2.1.0', versionsuffix),
]

use_pip = True
download_dep_fail = True

preinstallopts = 'export XFORMERS_MORE_DETAILS=1 && '
preinstallopts += 'export XFORMERS_DISABLE_FLASH_ATTN=1 && '
preinstallopts += 'export TORCH_CUDA_ARCH_LIST="5.2;6.0;7.0;7.5;8.0;8.6;9.0" && '
preinstallopts += 'export MAX_JOBS=3 && '

sanity_pip_check = True

moduleclass = 'ai'
2 changes: 2 additions & 0 deletions test/easyconfigs/easyconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,8 @@ def check_dep_vars(self, gen, dep, dep_vars):
# tensorflow-probability version to TF version
('2.8.4;', ['tensorflow-probability-0.16.0-']),
],
# vLLM has pinned dependency tiktoken == 0.6.0
'tiktoken': [('0.6.0;', ['vLLM-0.4.0-'])],
# smooth-topk uses a newer version of torchvision
'torchvision': [('0.11.3;', ['smooth-topk-1.0-20210817-'])],
# for the sake of backwards compatibility, keep UCX-CUDA v1.11.0 which depends on UCX v1.11.0
Expand Down
Loading