forked from ptrckqnln/runpod-worker-oobabooga
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile.Standalone
50 lines (42 loc) · 1.54 KB
/
Dockerfile.Standalone
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
ARG MODEL="LoneStriker/Air-Striker-Mixtral-8x7B-Instruct-ZLoss-3.75bpw-h6-exl2"
ENV DEBIAN_FRONTEND=noninteractive \
PIP_PREFER_BINARY=1 \
PYTHONUNBUFFERED=1
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
# Upgrade apt packages and install required dependencies
RUN apt update && \
apt upgrade -y && \
apt install -y \
python3-dev \
python3-venv \
git \
git-lfs && \
apt autoremove -y && \
rm -rf /var/lib/apt/lists/* && \
apt clean -y
# 1. Clone the worker repo
# 2. Create and activate Python venv
# 5. Create test_input.json file for test inference
# 6. Run test inference using rp_handler.py to cache the models
WORKDIR /workspace
RUN git clone https://github.com/oobabooga/text-generation-webui && \
cd text-generation-webui && \
python3 -m venv venv && \
source venv/bin/activate && \
pip3 install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 && \
pip3 install -r requirements.txt && \
bash -c 'for req in extensions/*/requirements.txt ; do pip3 install -r "$req" ; done' && \
deactivate
# Fetch the model
COPY download_model.py fetch_model.py /
RUN source /workspace/text-generation-webui/venv/bin/activate && \
pip3 install huggingface_hub runpod && \
/fetch_model.py ${MODEL} /workspace/text-generation-webui/models
# Docker container start script
COPY start_standalone.sh /start.sh
COPY rp_handler.py /
COPY schemas /schemas
# Start the container
RUN chmod +x /start.sh
CMD /start.sh