diff --git a/easybuild/easyconfigs/v/vLLM/vLLM-0.4.0-foss-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/v/vLLM/vLLM-0.4.0-foss-2023a-CUDA-12.1.1.eb new file mode 100644 index 00000000000..90663d6e83b --- /dev/null +++ b/easybuild/easyconfigs/v/vLLM/vLLM-0.4.0-foss-2023a-CUDA-12.1.1.eb @@ -0,0 +1,97 @@ +easyblock = 'PythonBundle' + +name = 'vLLM' +version = '0.4.0' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://github.com/vllm-project/vllm' +description = """A high-throughput and memory-efficient inference and serving engine for LLMs.""" + +toolchain = {'name': 'foss', 'version': '2023a'} + +builddependencies = [ + ('hatchling', '1.18.0'), + ('maturin', '1.1.0'), + ('CMake', '3.26.3'), + ('Ninja', '1.11.1'), +] +dependencies = [ + ('CUDA', '12.1.1', '', SYSTEM), + ('Python', '3.11.3'), + ('Python-bundle-PyPI', '2023.06'), + ('SciPy-bundle', '2023.07'), + ('SentencePiece', '0.2.0'), + ('py-cpuinfo', '9.0.0'), + ('Transformers', '4.39.3'), + ('pydantic', '2.5.3'), + ('tiktoken', '0.6.0'), + ('PyTorch-bundle', '2.1.2', versionsuffix), + ('Triton', '2.1.0', versionsuffix), + ('Ray-project', '2.9.1'), + ('numba', '0.58.1'), + ('xformers', '0.0.23.post1', versionsuffix), +] + +use_pip = True + +exts_list = [ + ('sniffio', '1.3.0', { + 'checksums': ['e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101'], + }), + ('anyio', '4.3.0', { + 'checksums': ['f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6'], + }), + ('starlette', '0.36.3', { + 'checksums': ['90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080'], + }), + ('fastapi', '0.110.0', { + 'checksums': ['266775f0dcc95af9d3ef39bad55cff525329a931d5fd51930aadd4f428bf7ff3'], + }), + ('h11', '0.14.0', { + 'checksums': ['8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d'], + }), + ('uvicorn', '0.30.0', { + 'checksums': ['f678dec4fa3a39706bbf49b9ec5fc40049d42418716cea52b53f07828a60aa37'], + }), + ('prometheus_client', '0.21.0', { + 'checksums': ['96c83c606b71ff2b0a433c98889d275f51ffec6c5e267de37c7a2b5c9aa9233e'], + }), + ('pynvml', '11.5.0', { + 'checksums': ['d027b21b95b1088b9fc278117f9f61b7c67f8e33a787e9f83f735f0f71ac32d0'], + }), + ('diskcache', '5.6.3', { + 'checksums': ['2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc'], + }), + ('interegular', '0.3.3', { + 'checksums': ['d9b697b21b34884711399ba0f0376914b81899ce670032486d0d048344a76600'], + }), + ('lark', '1.2.2', { + 'checksums': ['ca807d0162cd16cef15a8feecb862d7319e7a09bdb13aef927968e45040fed80'], + }), + ('nest_asyncio', '1.6.0', { + 'checksums': ['6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe'], + }), + ('rpds_py', '0.18.0', { + 'modulename': 'rpds', + 'checksums': ['42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d'], + }), + ('referencing', '0.35.1', { + 'checksums': ['25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c'], + }), + ('outlines', '0.0.34', { + 'checksums': ['594e7204c770b47a62eb5c2ba7d25ea0ab2e16882b5f04556712a0228d3d3309'], + }), + (name, version, { + # delete cmake and ninja deps to let pip check pass + 'preinstallopts': "sed -i -e '/cmake>=3.21/d' -e '/ninja/d' requirements.txt && ", + 'source_urls': ['https://github.com/vllm-project/vllm/archive/'], + 'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}], + 'checksums': ['95e5fa8f6ac63f11b4c7122e01c0892beae91f754a64a7e525bc06c3b49feac7'], + }), +] + +sanity_pip_check = True + +sanity_check_commands = ["python -c 'from vllm import LLM, SamplingParams'"] + +moduleclass = 'ai' diff --git a/easybuild/easyconfigs/x/xformers/xformers-0.0.23.post1-foss-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/x/xformers/xformers-0.0.23.post1-foss-2023a-CUDA-12.1.1.eb new file mode 100644 index 00000000000..46ddc7138f7 --- /dev/null +++ b/easybuild/easyconfigs/x/xformers/xformers-0.0.23.post1-foss-2023a-CUDA-12.1.1.eb @@ -0,0 +1,40 @@ +easyblock = 'PythonPackage' + +name = 'xformers' +version = '0.0.23.post1' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://github.com/facebookresearch/xformers' +description = """Flexible Transformers, defined by interoperable and optimized building blocks.""" + +toolchain = {'name': 'foss', 'version': '2023a'} + +sources = [SOURCE_TAR_GZ] +checksums = ['b443b158bd7b5275b485d2c6aee94ebc2152878fd784e379b1c8bcb1d67f3b81'] + +builddependencies = [ + ('CMake', '3.26.3'), + ('Ninja', '1.11.1'), +] +dependencies = [ + ('CUDA', '12.1.1', '', SYSTEM), + ('Python', '3.11.3'), + ('Python-bundle-PyPI', '2023.06'), + ('SciPy-bundle', '2023.07'), + ('PyTorch-bundle', '2.1.2', versionsuffix), + ('flash-attention', '2.6.3', versionsuffix), + ('CUTLASS', '3.4.0', versionsuffix), + ('Triton', '2.1.0', versionsuffix), +] + +use_pip = True +download_dep_fail = True + +preinstallopts = 'export XFORMERS_MORE_DETAILS=1 && ' +preinstallopts += 'export XFORMERS_DISABLE_FLASH_ATTN=1 && ' +preinstallopts += 'export TORCH_CUDA_ARCH_LIST="5.2;6.0;7.0;7.5;8.0;8.6;9.0" && ' +preinstallopts += 'export MAX_JOBS=3 && ' + +sanity_pip_check = True + +moduleclass = 'ai' diff --git a/test/easyconfigs/easyconfigs.py b/test/easyconfigs/easyconfigs.py index 4b1c4dd770e..916196f9440 100644 --- a/test/easyconfigs/easyconfigs.py +++ b/test/easyconfigs/easyconfigs.py @@ -658,6 +658,8 @@ def check_dep_vars(self, gen, dep, dep_vars): # tensorflow-probability version to TF version ('2.8.4;', ['tensorflow-probability-0.16.0-']), ], + # vLLM has pinned dependency tiktoken == 0.6.0 + 'tiktoken': [('0.6.0;', ['vLLM-0.4.0-'])], # smooth-topk uses a newer version of torchvision 'torchvision': [('0.11.3;', ['smooth-topk-1.0-20210817-'])], # for the sake of backwards compatibility, keep UCX-CUDA v1.11.0 which depends on UCX v1.11.0