From 312ec7bd0347cf18c55aefdfbfaef398c95dbde2 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Fri, 10 May 2024 11:44:38 -0600 Subject: [PATCH 1/3] TEMP: no shared tokenizer from PR-3512 Gotta get conflicts with the main line figured resolved first. Just don't use the shared tokenizer for now. Signed-off-by: Travis Johnson --- vllm/entrypoints/grpc/grpc_server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/grpc/grpc_server.py b/vllm/entrypoints/grpc/grpc_server.py index f4450e175dc24..9aead23aaee06 100644 --- a/vllm/entrypoints/grpc/grpc_server.py +++ b/vllm/entrypoints/grpc/grpc_server.py @@ -118,7 +118,8 @@ def __init__(self, engine: AsyncLLMEngine, args: argparse.Namespace): async def _post_init(self): self.config = await self.engine.get_model_config() - self.tokenizer_group = await self.engine.get_tokenizer_group() + # self.tokenizer_group = await self.engine.get_tokenizer_group() + self.tokenizer_group = self.engine.engine.tokenizer self.tokenizer = await self.engine.get_tokenizer() # Swap in the special TGIS stats logger From bd23984499fb111a8b2f50317da122c335572a29 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Mon, 29 Apr 2024 15:45:15 -0700 Subject: [PATCH 2/3] [Core] Make Ray an optional "extras" requirement Still included in built docker images --- Dockerfile | 2 +- requirements-cuda.txt | 1 - requirements-rocm.txt | 3 +-- setup.py | 20 ++++++++++++++++---- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Dockerfile b/Dockerfile index ddca95c0e8786..27dbe6ff88122 100644 --- a/Dockerfile +++ b/Dockerfile @@ -104,7 +104,7 @@ RUN ldconfig /usr/local/cuda-12.4/compat/ # install vllm wheel first, so that torch etc will be installed RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ --mount=type=cache,target=/root/.cache/pip \ - pip install dist/*.whl --verbose + pip install "$(echo dist/*.whl)[ray]" --verbose #################### vLLM installation IMAGE #################### diff --git a/requirements-cuda.txt b/requirements-cuda.txt index acb0164007dba..99891e5b64ab3 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -2,7 +2,6 @@ -r requirements-common.txt # Dependencies for NVIDIA GPUs -ray >= 2.9 nvidia-ml-py # for pynvml package vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library torch == 2.3.0 diff --git a/requirements-rocm.txt b/requirements-rocm.txt index 903845b64d98f..80f4f4431d830 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -1,5 +1,4 @@ # Common dependencies -r requirements-common.txt -# Dependencies for AMD GPUs -ray == 2.9.3 +# No specific dependencies currently for AMD GPUs diff --git a/setup.py b/setup.py index a66af2c5d556f..cae4468cae67d 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ import subprocess import sys from shutil import which -from typing import Dict, List +from typing import Dict, List, Optional import torch from packaging.version import Version, parse @@ -380,6 +380,20 @@ def _read_requirements(filename: str) -> List[str]: return requirements +def get_extra_requirements() -> Optional[Dict[str, List[str]]]: + extras = {"tensorizer": ["tensorizer>=2.9.0"]} + if _is_cuda(): + extras["ray"] = ["ray>=2.9"] + elif _is_hip(): + extras["ray"] = ["ray==2.9.3"] + elif _is_neuron() or _is_cpu(): + pass + else: + raise ValueError( + "Unsupported platform, please use CUDA, ROCM or Neuron.") + return extras + + ext_modules = [] if _is_cuda(): @@ -425,9 +439,7 @@ def _read_requirements(filename: str) -> List[str]: python_requires=">=3.8", install_requires=get_requirements(), ext_modules=ext_modules, - extras_require={ - "tensorizer": ["tensorizer>=2.9.0"], - }, + extras_require=get_extra_requirements(), cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {}, package_data=package_data, ) From 3be261cf3e4bfe3eee1650ca6d59e50d52d3d169 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Tue, 21 May 2024 12:16:19 +0200 Subject: [PATCH 3/3] Dockerfile.ubi: remove leftover flash-attn references --- Dockerfile.ubi | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index d4fbd52d1c8ce..16ad4e6018f39 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -240,10 +240,6 @@ RUN pip install \ mv /root/.config/vllm/nccl/cu12/libnccl.so.2.18.1 /opt/vllm/lib/ && \ chmod 0755 /opt/vllm/lib/libnccl.so.2.18.1 -# Install flash attention (from pre-built wheel) -RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \ - pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir - RUN --mount=type=cache,target=/root/.cache/pip \ pip install \ # additional dependencies for the TGIS gRPC server