Merge pull request #21901 from pavelToman/20241122130705_new_pr_vLLM040

{ai}[foss/2023a] vLLM v0.4.0, xformers v0.0.23.post1 w/ CUDA 12.1.1
easybuilders · Dec 20, 2024 · 884b086 · 884b086
2 parents 59224a8 + 28410b6
commit 884b086
Show file tree

Hide file tree

Showing 3 changed files with 139 additions and 0 deletions.
diff --git a/easybuild/easyconfigs/v/vLLM/vLLM-0.4.0-foss-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/v/vLLM/vLLM-0.4.0-foss-2023a-CUDA-12.1.1.eb
@@ -0,0 +1,97 @@
+easyblock = 'PythonBundle'
+
+name = 'vLLM'
+version = '0.4.0'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://github.com/vllm-project/vllm'
+description = """A high-throughput and memory-efficient inference and serving engine for LLMs."""
+
+toolchain = {'name': 'foss', 'version': '2023a'}
+
+builddependencies = [
+    ('hatchling', '1.18.0'),
+    ('maturin', '1.1.0'),
+    ('CMake', '3.26.3'),
+    ('Ninja', '1.11.1'),
+]
+dependencies = [
+    ('CUDA', '12.1.1', '', SYSTEM),
+    ('Python', '3.11.3'),
+    ('Python-bundle-PyPI', '2023.06'),
+    ('SciPy-bundle', '2023.07'),
+    ('SentencePiece', '0.2.0'),
+    ('py-cpuinfo', '9.0.0'),
+    ('Transformers', '4.39.3'),
+    ('pydantic', '2.5.3'),
+    ('tiktoken', '0.6.0'),
+    ('PyTorch-bundle', '2.1.2', versionsuffix),
+    ('Triton', '2.1.0', versionsuffix),
+    ('Ray-project', '2.9.1'),
+    ('numba', '0.58.1'),
+    ('xformers', '0.0.23.post1', versionsuffix),
+]
+
+use_pip = True
+
+exts_list = [
+    ('sniffio', '1.3.0', {
+        'checksums': ['e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101'],
+    }),
+    ('anyio', '4.3.0', {
+        'checksums': ['f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6'],
+    }),
+    ('starlette', '0.36.3', {
+        'checksums': ['90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080'],
+    }),
+    ('fastapi', '0.110.0', {
+        'checksums': ['266775f0dcc95af9d3ef39bad55cff525329a931d5fd51930aadd4f428bf7ff3'],
+    }),
+    ('h11', '0.14.0', {
+        'checksums': ['8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d'],
+    }),
+    ('uvicorn', '0.30.0', {
+        'checksums': ['f678dec4fa3a39706bbf49b9ec5fc40049d42418716cea52b53f07828a60aa37'],
+    }),
+    ('prometheus_client', '0.21.0', {
+        'checksums': ['96c83c606b71ff2b0a433c98889d275f51ffec6c5e267de37c7a2b5c9aa9233e'],
+    }),
+    ('pynvml', '11.5.0', {
+        'checksums': ['d027b21b95b1088b9fc278117f9f61b7c67f8e33a787e9f83f735f0f71ac32d0'],
+    }),
+    ('diskcache', '5.6.3', {
+        'checksums': ['2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc'],
+    }),
+    ('interegular', '0.3.3', {
+        'checksums': ['d9b697b21b34884711399ba0f0376914b81899ce670032486d0d048344a76600'],
+    }),
+    ('lark', '1.2.2', {
+        'checksums': ['ca807d0162cd16cef15a8feecb862d7319e7a09bdb13aef927968e45040fed80'],
+    }),
+    ('nest_asyncio', '1.6.0', {
+        'checksums': ['6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe'],
+    }),
+    ('rpds_py', '0.18.0', {
+        'modulename': 'rpds',
+        'checksums': ['42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d'],
+    }),
+    ('referencing', '0.35.1', {
+        'checksums': ['25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c'],
+    }),
+    ('outlines', '0.0.34', {
+        'checksums': ['594e7204c770b47a62eb5c2ba7d25ea0ab2e16882b5f04556712a0228d3d3309'],
+    }),
+    (name, version, {
+        # delete cmake and ninja deps to let pip check pass
+        'preinstallopts': "sed -i -e '/cmake>=3.21/d' -e '/ninja/d' requirements.txt && ",
+        'source_urls': ['https://github.com/vllm-project/vllm/archive/'],
+        'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}],
+        'checksums': ['95e5fa8f6ac63f11b4c7122e01c0892beae91f754a64a7e525bc06c3b49feac7'],
+    }),
+]
+
+sanity_pip_check = True
+
+sanity_check_commands = ["python -c 'from vllm import LLM, SamplingParams'"]
+
+moduleclass = 'ai'
diff --git a/easybuild/easyconfigs/x/xformers/xformers-0.0.23.post1-foss-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/x/xformers/xformers-0.0.23.post1-foss-2023a-CUDA-12.1.1.eb
@@ -0,0 +1,40 @@
+easyblock = 'PythonPackage'
+
+name = 'xformers'
+version = '0.0.23.post1'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://github.com/facebookresearch/xformers'
+description = """Flexible Transformers, defined by interoperable and optimized building blocks."""
+
+toolchain = {'name': 'foss', 'version': '2023a'}
+
+sources = [SOURCE_TAR_GZ]
+checksums = ['b443b158bd7b5275b485d2c6aee94ebc2152878fd784e379b1c8bcb1d67f3b81']
+
+builddependencies = [
+    ('CMake', '3.26.3'),
+    ('Ninja', '1.11.1'),
+]
+dependencies = [
+    ('CUDA', '12.1.1', '', SYSTEM),
+    ('Python', '3.11.3'),
+    ('Python-bundle-PyPI', '2023.06'),
+    ('SciPy-bundle', '2023.07'),
+    ('PyTorch-bundle', '2.1.2', versionsuffix),
+    ('flash-attention', '2.6.3', versionsuffix),
+    ('CUTLASS', '3.4.0', versionsuffix),
+    ('Triton', '2.1.0', versionsuffix),
+]
+
+use_pip = True
+download_dep_fail = True
+
+preinstallopts = 'export XFORMERS_MORE_DETAILS=1 && '
+preinstallopts += 'export XFORMERS_DISABLE_FLASH_ATTN=1 && '
+preinstallopts += 'export TORCH_CUDA_ARCH_LIST="5.2;6.0;7.0;7.5;8.0;8.6;9.0" && '
+preinstallopts += 'export MAX_JOBS=3 && '
+
+sanity_pip_check = True
+
+moduleclass = 'ai'
diff --git a/test/easyconfigs/easyconfigs.py b/test/easyconfigs/easyconfigs.py
@@ -658,6 +658,8 @@ def check_dep_vars(self, gen, dep, dep_vars):
                 # tensorflow-probability version to TF version
                 ('2.8.4;', ['tensorflow-probability-0.16.0-']),
             ],
+            # vLLM has pinned dependency tiktoken == 0.6.0
+            'tiktoken': [('0.6.0;', ['vLLM-0.4.0-'])],
             # smooth-topk uses a newer version of torchvision
             'torchvision': [('0.11.3;', ['smooth-topk-1.0-20210817-'])],
             # for the sake of backwards compatibility, keep UCX-CUDA v1.11.0 which depends on UCX v1.11.0