diff --git a/Dockerfile.ubi b/Dockerfile.ubi
index 3ba7577c4fe71..c5e510f060342 100644
--- a/Dockerfile.ubi
+++ b/Dockerfile.ubi
@@ -229,6 +229,58 @@ WORKDIR /usr/src/flash-attention-v2
 RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
     --no-build-isolation --no-deps --no-cache-dir
 
+
+## Test ########################################################################
+FROM dev AS test
+
+WORKDIR /vllm-workspace
+# ADD is used to preserve directory structure
+# NB: Could leak secrets from local context, the test image should not be pushed
+# to a registry
+ADD . /vllm-workspace/
+# copy pytorch extensions separately to avoid having to rebuild
+# when python code changes
+COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
+# Install flash attention (from pre-built wheel)
+RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
+    pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
+# ignore build dependencies installation because we are using pre-complied extensions
+RUN rm pyproject.toml
+RUN --mount=type=cache,target=/root/.cache/pip \
+    VLLM_USE_PRECOMPILED=1 pip install . --verbose
+
+
+## Proto Compilation ###########################################################
+FROM python-base AS gen-protos
+
+RUN microdnf install -y \
+        make \
+        findutils \
+    && microdnf clean all
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=Makefile,target=Makefile \
+    --mount=type=bind,source=proto,target=proto \
+    make gen-protos
+
+## vLLM Library Files ##########################################################
+# Little extra stage to gather files and manage permissions on them without any
+# duplication in the release layer due to permission changes
+FROM base AS vllm
+
+WORKDIR /vllm-staging
+# COPY files from various places into a staging directory
+COPY vllm vllm
+COPY --from=build /workspace/vllm/*.so vllm/
+COPY --from=gen-protos /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb
+
+# custom COPY command to use umask to control permissions and grant permissions
+# to the group
+RUN umask 002 \
+    && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
+    # not strictly needed, but .so files typically have executable bits
+    && chmod +x /workspace/vllm/*.so
+
 ## Release #####################################################################
 # Note from the non-UBI Dockerfile:
 # We used base cuda image because pytorch installs its own cuda libraries.