From 15076fabb90a76188ec77d1162e63f866d8c6c75 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Fri, 12 Apr 2024 00:50:25 +0100 Subject: [PATCH] Compile kernels and fix build (#17) These Dockerfile changes: - Update the release stage to work with the recently refactored `requirements-common.txt` / `requirements-cuda.txt` split - Fixup the kernel compilation in the `build` stage to correctly pick up cuda - Install the kernels from this docker build rather than pulling a precompiled wheel. We can swap that back once a new wheel is available with the correct pytorch version + updated interfaces --------- Signed-off-by: Nick Hill Signed-off-by: Joe Runde Co-authored-by: Joe Runde --- Dockerfile.ubi | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 14bc0bc6c3672..d80691e563f59 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -175,6 +175,10 @@ ENV NVCC_THREADS=$nvcc_threads # make sure punica kernels are built (for LoRA) ENV VLLM_INSTALL_PUNICA_KERNELS=1 +# Setup path stuff? Ref: https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/build.sh#L6-L8 +ENV PATH=/usr/local/cuda/bin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH + RUN python3 setup.py build_ext --inplace @@ -257,7 +261,7 @@ FROM base AS vllm WORKDIR /vllm-staging # COPY files from various places into a staging directory COPY --link vllm vllm -COPY --from=prebuilt-wheel --link /workspace/vllm/*.so vllm/ +COPY --from=build --link /workspace/vllm/*.so vllm/ COPY --from=gen-protos --link /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb # custom COPY command to use umask to control permissions and grant permissions @@ -281,9 +285,10 @@ COPY --from=python-torch-base --link /opt/vllm /opt/vllm ENV PATH=/opt/vllm/bin/:$PATH RUN --mount=type=cache,target=/root/.cache/pip \ - --mount=type=bind,source=requirements.txt,target=requirements.txt \ + --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ + --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ pip3 install \ - -r requirements.txt \ + -r requirements-cuda.txt \ # additional dependencies for the TGIS gRPC server grpcio-tools==1.62.1 \ # additional dependencies for openai api_server