Skip to content

Commit

Permalink
install adapter for 0.6.5
Browse files Browse the repository at this point in the history
Signed-off-by: Jefferson Fialho <[email protected]>
  • Loading branch information
yecohn authored and fialhocoelho committed Dec 19, 2024
1 parent e362236 commit 3f74522
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 160 deletions.
277 changes: 118 additions & 159 deletions Dockerfile.rocm.ubi
Original file line number Diff line number Diff line change
@@ -1,169 +1,131 @@
## Global Args ##################################################################
ARG BASE_UBI_IMAGE_TAG=9.4
## Global Args #################################################################
ARG BASE_UBI_IMAGE_TAG=9.5-1733767867
ARG PYTHON_VERSION=3.12
# Default ROCm ARCHes to build vLLM for.
ARG PYTORCH_ROCM_ARCH="gfx908;gfx90a;gfx942;gfx1100"
ARG MAX_JOBS=12

FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'

## Base Layer ##################################################################
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
ARG PYTHON_VERSION
ENV PYTHON_VERSION=${PYTHON_VERSION}
RUN microdnf -y update && microdnf install -y \
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
&& microdnf clean all

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
WORKDIR /workspace

RUN --mount=type=cache,target=/root/.cache/pip \
microdnf -y update && \
microdnf install -y --setopt=install_weak_deps=0 --nodocs \
python${PYTHON_VERSION}-devel \
python${PYTHON_VERSION}-pip \
python${PYTHON_VERSION}-wheel && \
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && \
pip install -U pip wheel setuptools uv && \
microdnf clean all


FROM base AS rocm_base
ARG ROCM_VERSION=6.2.3
ARG PYTHON_VERSION
ARG BASE_UBI_IMAGE_TAG

RUN printf "[amdgpu]\n\
name=amdgpu\n\
baseurl=https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/${BASE_UBI_IMAGE_TAG}/main/x86_64/\n\
enabled=1\n\
priority=50\n\
gpgcheck=1\n\
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key\n\
[ROCm-${ROCM_VERSION}]\n\
name=ROCm${ROCM_VERSION}\n\
baseurl=https://repo.radeon.com/rocm/rhel9/${ROCM_VERSION}/main\n\
enabled=1\n\
priority=50\n\
gpgcheck=1\n\
gpgkey=https://repo.radeon.com/rocm/rocm.gpg.key" > /etc/yum.repos.d/amdgpu.repo
ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8

# Some utils for dev purposes - tar required for kubectl cp
RUN microdnf install -y \
which procps findutils tar vim git\
&& microdnf clean all

RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \
uv pip install --pre \
--index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \
torch==2.6.0.dev20241107+rocm${version}\
torchvision==0.20.0.dev20241107+rocm${version} && \
# Install libdrm-amdgpu to avoid errors when retrieving device information (amdgpu.ids: No such file or directory)
microdnf install -y libdrm-amdgpu && \
microdnf clean all

## Python Installer ############################################################
FROM base as python-install
ARG PYTHON_VERSION

ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/numpy.libs:$LD_LIBRARY_PATH"
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/pillow.libs:$LD_LIBRARY_PATH"
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/triton/backends/amd/lib:$LD_LIBRARY_PATH"
ENV LD_LIBRARY_PATH="$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/lib:$LD_LIBRARY_PATH"
ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
ENV PYTHON_VERSION=${PYTHON_VERSION}
RUN microdnf install -y \
python${PYTHON_VERSION}-devel && \
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all

RUN echo $LD_LIBRARY_PATH | tr : \\n >> /etc/ld.so.conf.d/torch-venv.conf && \
ldconfig

FROM rocm_base as rocm_devel
## CUDA Base ###################################################################
FROM python-install as cuda-base

ENV CCACHE_DIR=/root/.cache/ccache
RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo

RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
rpm -ql epel-release && \
microdnf -y update && \
microdnf -y install \
ccache \
git \
# packages required to build vllm
amd-smi \
hipblas-devel \
hipblaslt-devel \
hipcc \
hipcub-devel \
hipfft-devel \
hiprand-devel \
hipsolver-devel \
hipsparse-devel \
hsa-rocr-devel \
miopen-hip-devel \
rccl-devel \
rocblas-devel \
rocm-device-libs \
rocprim-devel \
rocrand-devel \
rocthrust-devel \
# end packages required to build vllm
wget \
which && \
RUN microdnf install -y \
cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
microdnf clean all

WORKDIR /workspace

ENV LLVM_SYMBOLIZER_PATH=/opt/rocm/llvm/bin/llvm-symbolizer
ENV PATH=$PATH:/opt/rocm/bin
ENV CPLUS_INCLUDE_PATH=$VIRTUAL_ENV/lib/python${PYTHON_VERSION}/site-packages/torch/include:/opt/rocm/include
ENV CUDA_HOME="/usr/local/cuda" \
PATH="${CUDA_HOME}/bin:${PATH}" \
LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"

## Python cuda base #################################################################
FROM cuda-base AS python-cuda-base

FROM rocm_devel AS build_amdsmi

# Build AMD SMI wheel
RUN cd /opt/rocm/share/amd_smi && \
python3 -m pip wheel . --wheel-dir=/install
ENV VIRTUAL_ENV=/opt/vllm
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

##################################################################################################
# install cuda and common dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
uv pip install \
-r requirements-cuda.txt

FROM rocm_devel AS build_flashattention

ARG FA_GFX_ARCHS="gfx90a;gfx942"
## Development #################################################################
FROM python-cuda-base AS dev

# the FA_BRANCH commit belongs to the ROCm/flash-attention fork, `main_perf` branch
ARG FA_BRANCH="3cea2fb"
ARG MAX_JOBS
ENV MAX_JOBS=${MAX_JOBS}
# install build and runtime dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
--mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
--mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \
--mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \
uv pip install \
-r requirements-cuda.txt \
-r requirements-dev.txt

RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/workspace/build \
mkdir -p /libs && \
cd /libs && \
git clone https://github.com/ROCm/flash-attention.git && \
cd flash-attention && \
git checkout ${FA_BRANCH} && \
git submodule update --init && \
uv pip install cmake ninja packaging && \
env \
GPU_ARCHS="${FA_GFX_ARCHS}" \
python3 setup.py bdist_wheel --dist-dir=/install
## Builder #####################################################################
FROM dev AS build

##################################################################################################
# install build dependencies
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
uv pip install -r requirements-build.txt

FROM rocm_devel AS build_vllm
ARG PYTORCH_ROCM_ARCH
ARG PYTHON_VERSION
ARG MAX_JOBS
ENV MAX_JOBS=${MAX_JOBS}
ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}
# install compiler cache to speed up compilation leveraging local or remote caching
# git is required for the cutlass kernels
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all

COPY . .

ENV VLLM_TARGET_DEVICE="rocm"
ENV MAX_JOBS=${MAX_JOBS}
# Make sure punica kernels are built (for LoRA)
ARG TORCH_CUDA_ARCH_LIST
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
ARG vllm_fa_cmake_gpu_arches
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}

# max jobs used by Ninja to build extensions
ARG max_jobs=2
ENV MAX_JOBS=${max_jobs}
# number of threads used by nvcc
ARG nvcc_threads=8
ENV NVCC_THREADS=$nvcc_threads
# make sure punica kernels are built (for LoRA)
ENV VLLM_INSTALL_PUNICA_KERNELS=1

# Make sure the cuda environment is in the PATH
ENV PATH=/usr/local/cuda/bin:$PATH

ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
uv pip install -v -U \
ninja setuptools-scm>=8 "cmake>=3.26" packaging && \
--mount=type=bind,src=.git,target=/workspace/.git \
env CFLAGS="-march=haswell" \
CXXFLAGS="$CFLAGS $CXXFLAGS" \
CMAKE_BUILD_TYPE=Release \
python3 setup.py bdist_wheel --dist-dir=dist
python3 setup.py bdist_wheel --dist-dir=dist

#################### libsodium Build IMAGE ####################
FROM rocm_base as libsodium-builder
FROM base as libsodium-builder

RUN microdnf install -y gcc gzip tar \
RUN microdnf install -y gcc gzip \
&& microdnf clean all

WORKDIR /usr/src/libsodium
Expand All @@ -174,44 +136,43 @@ RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM
&& rm -f libsodium*.tar.gz \
&& mv libsodium*/* ./

RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection" \
./configure \
--prefix="/usr/" \
--libdir=/usr/lib64 && \
make -j $(nproc) && \
make check
RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection"\
./configure --prefix="/usr/" && make -j $MAX_JOBS && make check

##################################################################################################

FROM rocm_base AS vllm-openai
ARG MAX_JOBS
## Release #####################################################################
FROM python-install AS vllm-openai
ARG PYTHON_VERSION

WORKDIR /workspace

ENV VIRTUAL_ENV=/opt/vllm
ENV PATH=$VIRTUAL_ENV/bin:$PATH

# Required for triton
RUN microdnf install -y --setopt=install_weak_deps=0 --nodocs gcc rsync && \
microdnf clean all
# force using the python venv's cuda runtime libraries
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_nvrtc/lib:${LD_LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_runtime/lib:${LD_LIBRARY_PATH}"
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/nvtx/lib:${LD_LIBRARY_PATH}"

# Triton needs a CC compiler
RUN microdnf install -y gcc \
rsync \
&& microdnf clean all

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
--mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
uv pip install "$(echo dist/*.whl)[tensorizer]" --verbose

# Install libsodium for Tensorizer encryption
RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
cd /usr/src/libsodium \
&& make install

RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install/amdsmi/ \
--mount=type=bind,from=build_flashattention,src=/install,target=/install/flashattention \
--mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \
--mount=type=cache,target=/root/.cache/pip \
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
export version="$(awk -F. '{print $1"."$2}' <<< $ROCM_VERSION)" && \
uv pip install \
--index-strategy=unsafe-best-match \
--extra-index-url "https://download.pytorch.org/whl/nightly/rocm${version}" \
/install/amdsmi/*.whl\
/install/flashattention/*.whl\
/install/vllm/*.whl
"https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu124torch2.4-cp312-cp312-linux_x86_64.whl"

ENV HF_HUB_OFFLINE=1 \
HOME=/home/vllm \
Expand All @@ -222,20 +183,15 @@ ENV HF_HUB_OFFLINE=1 \
VLLM_USAGE_SOURCE=production-docker-image \
VLLM_WORKER_MULTIPROC_METHOD=fork \
VLLM_NO_USAGE_STATS=1 \
# Silences the HF Tokenizers warning
TOKENIZERS_PARALLELISM=false \
RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 \
VLLM_USE_TRITON_FLASH_ATTN=0 \
HIP_FORCE_DEV_KERNARG=1 \
OUTLINES_CACHE_DIR=/tmp/outlines \
NUMBA_CACHE_DIR=/tmp/numba \
TRITON_CACHE_DIR=/tmp/triton

# setup non-root user for OpenShift
RUN umask 002 && \
useradd --uid 2000 --gid 0 vllm && \
mkdir -p /licenses /home/vllm && \
chmod g+rwx /home/vllm
mkdir -p /home/vllm && \
chmod g+rwx /home/vllm /usr/src /workspace

COPY LICENSE /licenses/vllm.md
COPY examples/*.jinja /app/data/template/
Expand All @@ -252,8 +208,11 @@ USER root

RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \
HOME=/root uv pip install /install/vllm/*.whl vllm-tgis-adapter==0.5.3
--mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]"

RUN --mount=type=cache,target=/root/.cache/pip \
pip install git+https://github.com/opendatahub-io/vllm-tgis-adapter@vllm0p6p5

ENV GRPC_PORT=8033 \
PORT=8000 \
Expand Down
5 changes: 4 additions & 1 deletion Dockerfile.ubi
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,10 @@ USER root
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.5.3
HOME=/root uv pip install "$(echo /workspace/dist/*.whl)[tensorizer]"

RUN --mount=type=cache,target=/root/.cache/pip \
pip install git+https://github.com/opendatahub-io/vllm-tgis-adapter@vllm0p6p5

ENV GRPC_PORT=8033 \
PORT=8000 \
Expand Down

0 comments on commit 3f74522

Please sign in to comment.