Dockerfile.Standalone

FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04

ARG MODEL="LoneStriker/Air-Striker-Mixtral-8x7B-Instruct-ZLoss-3.75bpw-h6-exl2"

ENV DEBIAN_FRONTEND=noninteractive \
    PIP_PREFER_BINARY=1 \
    PYTHONUNBUFFERED=1

SHELL ["/bin/bash", "-o", "pipefail", "-c"]

# Upgrade apt packages and install required dependencies
RUN apt update && \
    apt upgrade -y && \
    apt install -y \
    python3-dev \
    python3-venv \
    git \
    git-lfs && \
    apt autoremove -y && \
    rm -rf /var/lib/apt/lists/* && \
    apt clean -y

# 1. Clone the worker repo
# 2. Create and activate Python venv
# 5. Create test_input.json file for test inference
# 6. Run test inference using rp_handler.py to cache the models
WORKDIR /workspace
RUN git clone https://github.com/oobabooga/text-generation-webui && \
    cd text-generation-webui && \
    python3 -m venv venv && \
    source venv/bin/activate && \
    pip3 install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 && \
    pip3 install -r requirements.txt && \
    bash -c 'for req in extensions/*/requirements.txt ; do pip3 install -r "$req" ; done' && \
    deactivate

# Fetch the model
COPY download_model.py fetch_model.py /
RUN source /workspace/text-generation-webui/venv/bin/activate && \
    pip3 install huggingface_hub runpod && \
    /fetch_model.py ${MODEL} /workspace/text-generation-webui/models

# Docker container start script
COPY start_standalone.sh /start.sh
COPY rp_handler.py /
COPY schemas /schemas

# Start the container
RUN chmod +x /start.sh
CMD /start.sh