Skip to content

Commit

Permalink
Merge pull request #21901 from pavelToman/20241122130705_new_pr_vLLM040
Browse files Browse the repository at this point in the history
{ai}[foss/2023a] vLLM v0.4.0, xformers v0.0.23.post1 w/ CUDA 12.1.1
  • Loading branch information
laraPPr authored Dec 20, 2024
2 parents 59224a8 + 28410b6 commit 884b086
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 0 deletions.
97 changes: 97 additions & 0 deletions easybuild/easyconfigs/v/vLLM/vLLM-0.4.0-foss-2023a-CUDA-12.1.1.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
easyblock = 'PythonBundle'

name = 'vLLM'
version = '0.4.0'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/vllm-project/vllm'
description = """A high-throughput and memory-efficient inference and serving engine for LLMs."""

toolchain = {'name': 'foss', 'version': '2023a'}

builddependencies = [
('hatchling', '1.18.0'),
('maturin', '1.1.0'),
('CMake', '3.26.3'),
('Ninja', '1.11.1'),
]
dependencies = [
('CUDA', '12.1.1', '', SYSTEM),
('Python', '3.11.3'),
('Python-bundle-PyPI', '2023.06'),
('SciPy-bundle', '2023.07'),
('SentencePiece', '0.2.0'),
('py-cpuinfo', '9.0.0'),
('Transformers', '4.39.3'),
('pydantic', '2.5.3'),
('tiktoken', '0.6.0'),
('PyTorch-bundle', '2.1.2', versionsuffix),
('Triton', '2.1.0', versionsuffix),
('Ray-project', '2.9.1'),
('numba', '0.58.1'),
('xformers', '0.0.23.post1', versionsuffix),
]

use_pip = True

exts_list = [
('sniffio', '1.3.0', {
'checksums': ['e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101'],
}),
('anyio', '4.3.0', {
'checksums': ['f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6'],
}),
('starlette', '0.36.3', {
'checksums': ['90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080'],
}),
('fastapi', '0.110.0', {
'checksums': ['266775f0dcc95af9d3ef39bad55cff525329a931d5fd51930aadd4f428bf7ff3'],
}),
('h11', '0.14.0', {
'checksums': ['8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d'],
}),
('uvicorn', '0.30.0', {
'checksums': ['f678dec4fa3a39706bbf49b9ec5fc40049d42418716cea52b53f07828a60aa37'],
}),
('prometheus_client', '0.21.0', {
'checksums': ['96c83c606b71ff2b0a433c98889d275f51ffec6c5e267de37c7a2b5c9aa9233e'],
}),
('pynvml', '11.5.0', {
'checksums': ['d027b21b95b1088b9fc278117f9f61b7c67f8e33a787e9f83f735f0f71ac32d0'],
}),
('diskcache', '5.6.3', {
'checksums': ['2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc'],
}),
('interegular', '0.3.3', {
'checksums': ['d9b697b21b34884711399ba0f0376914b81899ce670032486d0d048344a76600'],
}),
('lark', '1.2.2', {
'checksums': ['ca807d0162cd16cef15a8feecb862d7319e7a09bdb13aef927968e45040fed80'],
}),
('nest_asyncio', '1.6.0', {
'checksums': ['6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe'],
}),
('rpds_py', '0.18.0', {
'modulename': 'rpds',
'checksums': ['42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d'],
}),
('referencing', '0.35.1', {
'checksums': ['25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c'],
}),
('outlines', '0.0.34', {
'checksums': ['594e7204c770b47a62eb5c2ba7d25ea0ab2e16882b5f04556712a0228d3d3309'],
}),
(name, version, {
# delete cmake and ninja deps to let pip check pass
'preinstallopts': "sed -i -e '/cmake>=3.21/d' -e '/ninja/d' requirements.txt && ",
'source_urls': ['https://github.com/vllm-project/vllm/archive/'],
'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}],
'checksums': ['95e5fa8f6ac63f11b4c7122e01c0892beae91f754a64a7e525bc06c3b49feac7'],
}),
]

sanity_pip_check = True

sanity_check_commands = ["python -c 'from vllm import LLM, SamplingParams'"]

moduleclass = 'ai'
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
easyblock = 'PythonPackage'

name = 'xformers'
version = '0.0.23.post1'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/facebookresearch/xformers'
description = """Flexible Transformers, defined by interoperable and optimized building blocks."""

toolchain = {'name': 'foss', 'version': '2023a'}

sources = [SOURCE_TAR_GZ]
checksums = ['b443b158bd7b5275b485d2c6aee94ebc2152878fd784e379b1c8bcb1d67f3b81']

builddependencies = [
('CMake', '3.26.3'),
('Ninja', '1.11.1'),
]
dependencies = [
('CUDA', '12.1.1', '', SYSTEM),
('Python', '3.11.3'),
('Python-bundle-PyPI', '2023.06'),
('SciPy-bundle', '2023.07'),
('PyTorch-bundle', '2.1.2', versionsuffix),
('flash-attention', '2.6.3', versionsuffix),
('CUTLASS', '3.4.0', versionsuffix),
('Triton', '2.1.0', versionsuffix),
]

use_pip = True
download_dep_fail = True

preinstallopts = 'export XFORMERS_MORE_DETAILS=1 && '
preinstallopts += 'export XFORMERS_DISABLE_FLASH_ATTN=1 && '
preinstallopts += 'export TORCH_CUDA_ARCH_LIST="5.2;6.0;7.0;7.5;8.0;8.6;9.0" && '
preinstallopts += 'export MAX_JOBS=3 && '

sanity_pip_check = True

moduleclass = 'ai'
2 changes: 2 additions & 0 deletions test/easyconfigs/easyconfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,8 @@ def check_dep_vars(self, gen, dep, dep_vars):
# tensorflow-probability version to TF version
('2.8.4;', ['tensorflow-probability-0.16.0-']),
],
# vLLM has pinned dependency tiktoken == 0.6.0
'tiktoken': [('0.6.0;', ['vLLM-0.4.0-'])],
# smooth-topk uses a newer version of torchvision
'torchvision': [('0.11.3;', ['smooth-topk-1.0-20210817-'])],
# for the sake of backwards compatibility, keep UCX-CUDA v1.11.0 which depends on UCX v1.11.0
Expand Down

0 comments on commit 884b086

Please sign in to comment.