-
Notifications
You must be signed in to change notification settings - Fork 134
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature dockerfile for cortex (#1014)
Co-authored-by: Hien To <[email protected]>
- Loading branch information
1 parent
2562ac8
commit f38d9dc
Showing
12 changed files
with
234 additions
and
50 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Please change the base image to the appropriate CUDA version base on NVIDIA Driver Compatibility | ||
# Run nvidia-smi to check the CUDA version and the corresponding driver version | ||
# Then update the base image to the appropriate CUDA version refer https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags | ||
|
||
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 AS base | ||
|
||
# 1. Install dependencies only when needed | ||
FROM base AS devel | ||
|
||
# Install g++ 11 | ||
RUN apt update && apt install -y gcc-11 g++-11 cpp-11 jq xsel curl gnupg make python3-dev && curl -sL https://deb.nodesource.com/setup_20.x | bash - && apt install nodejs -y && rm -rf /var/lib/apt/lists/* | ||
|
||
# Update alternatives for GCC and related tools | ||
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 110 \ | ||
--slave /usr/bin/g++ g++ /usr/bin/g++-11 \ | ||
--slave /usr/bin/gcov gcov /usr/bin/gcov-11 \ | ||
--slave /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 \ | ||
--slave /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 && \ | ||
update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 110 | ||
|
||
RUN npm install -g yarn | ||
|
||
WORKDIR /app | ||
|
||
FROM devel AS release | ||
|
||
EXPOSE 1337 | ||
|
||
COPY ./common/entrypoint.sh /usr/local/bin/entrypoint.sh | ||
|
||
RUN chmod +x /usr/local/bin/entrypoint.sh | ||
|
||
HEALTHCHECK --interval=300s --timeout=30s --start-period=10s --retries=3 \ | ||
CMD curl -f http://127.0.0.1:1337/api/system || exit 1 | ||
|
||
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# Please change the base image to the appropriate CUDA version base on NVIDIA Driver Compatibility | ||
# Run nvidia-smi to check the CUDA version and the corresponding driver version | ||
# Then update the base image to the appropriate CUDA version refer https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags | ||
|
||
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 AS base | ||
|
||
# 1. Install dependencies only when needed | ||
FROM base AS devel | ||
|
||
# Install g++ 11 | ||
RUN apt update && apt install -y gcc-11 g++-11 cpp-11 jq xsel curl gnupg make python3-dev dnsmasq nginx iproute2 && curl -sL https://deb.nodesource.com/setup_20.x | bash - && apt install nodejs -y && rm -rf /var/lib/apt/lists/* | ||
|
||
# Update alternatives for GCC and related tools | ||
RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 110 \ | ||
--slave /usr/bin/g++ g++ /usr/bin/g++-11 \ | ||
--slave /usr/bin/gcov gcov /usr/bin/gcov-11 \ | ||
--slave /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 \ | ||
--slave /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 && \ | ||
update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 110 | ||
|
||
RUN npm install -g yarn | ||
|
||
RUN mkdir -p /etc/dnsmasq.d/ | ||
|
||
WORKDIR /app | ||
|
||
FROM devel AS release | ||
|
||
EXPOSE 80 | ||
|
||
COPY ./common/dnsmasq.conf /etc/dnsmasq.conf | ||
COPY ./common/blocked-domains.txt /etc/dnsmasq.d/blocked-domains.txt | ||
|
||
COPY ./common/entrypoint-firewall.sh /usr/local/bin/entrypoint.sh | ||
|
||
COPY ./common/routes.txt /app/routes.txt | ||
COPY ./common/generate_nginx_conf.sh /usr/local/bin/generate_nginx_conf.sh | ||
COPY ./common/nginx.conf /etc/nginx/nginx.conf | ||
|
||
RUN chmod +x /usr/local/bin/entrypoint.sh | ||
|
||
HEALTHCHECK --interval=300s --timeout=30s --start-period=10s --retries=3 \ | ||
CMD curl -f http://127.0.0.1/api/system || exit 1 | ||
|
||
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# Docker with cortex | ||
|
||
We offer two methods for deploying the Cortex environment on Docker. | ||
|
||
## Method 1: Use the default Dockerfile with Cortex. | ||
|
||
To use this method, you need to follow these steps: | ||
```bash | ||
git clone https://github.com/janhq/cortex.git | ||
cd cortex/docker | ||
docker build -t cortex:latest . | ||
|
||
# Run the container with GPU support | ||
docker run -it --gpus all -d -p 1337:1337 cortex:latest | ||
|
||
# Run the container with CPU support | ||
docker run -it -d -p 1337:1337 cortex:latest | ||
|
||
# After starting, you can access Swagger at http://localhost:1337/api and the API server at http://localhost:1337. | ||
# Additionally, you can exec into the container and use cortex-cli to perform other operations. | ||
``` | ||
|
||
## Method 2: Use Dockerfile.firewall with the feature to block outbound connections by domain and block inbound connections by API path. | ||
|
||
The use case for this method is when you want to host the Cortex API 100% offline, preventing access to remote models like the OpenAI API. Alternatively, you might want to block inbound connections by restricting clients from calling the API to load models `/v1/models/start`. | ||
|
||
To use this method, you need to follow these steps: | ||
|
||
- Step 1: Edit the contents of the [blocked-domains.txt](./docker/common/blocked-domains.txt) file according to your requirements. Refer to the provided examples in the file. The goal is to block outbound connections to the domains you do not want to allow. | ||
- Step 2: Edit the contents of the [blocked-paths.txt](./docker/common/blocked-paths.txt) file according to your requirements. Refer to the provided examples in the file. The goal is to block inbound connections to the paths you do not want to allow. | ||
- Step 3: Build the image with Dockerfile.firewall following the instructions below: | ||
|
||
```bash | ||
git clone https://github.com/janhq/cortex.git | ||
cd cortex/docker | ||
docker build -f Dockerfile.firewall -t cortex-with-firewall:latest . | ||
|
||
# Run the container with GPU support | ||
docker run -it --gpus all -d -p 1337:1337 cortex:latest | ||
|
||
# Run the container with CPU support | ||
docker run -it -d -p 1337:1337 cortex:latest | ||
|
||
# After starting, you can access Swagger at http://localhost:1337/api and the API server at http://localhost:1337. | ||
# Additionally, you can exec into the container and use cortex-cli to perform other operations. | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Block IPv4 of domain openai.com and all subdomains *.openai.com | ||
address=/openai.com/0.0.0.0 | ||
|
||
# Block IPv6 of domain openai.com and all subdomains *.openai.com | ||
address=/openai.com/:: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
server=8.8.8.8 | ||
|
||
no-resolv | ||
|
||
conf-file=/etc/dnsmasq.d/blocked-domains.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/bin/sh | ||
|
||
# Setup DNS resolution with dnsmasq | ||
echo "nameserver 127.0.0.1" > /etc/resolv.conf | ||
dnsmasq -k & | ||
|
||
# Generate Nginx configuration from routes.txt | ||
/usr/local/bin/generate_nginx_conf.sh | ||
|
||
# Install cortex | ||
npm install -g cortexso | ||
|
||
# Start cortex | ||
cortex -a 127.0.0.1 | ||
|
||
cortex engines llamacpp init | ||
cortex engines tensorrt-llm init | ||
|
||
# Start nginx | ||
nginx -g 'daemon off;' & | ||
|
||
# Keep the container running by tailing the log file | ||
tail -f /root/cortex/cortex.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/sh | ||
|
||
npm install -g cortexso | ||
# Run cortex | ||
cortex -a 0.0.0.0 | ||
|
||
cortex engines llamacpp init | ||
cortex engines tensorrt-llm init | ||
|
||
# Keep the container running by tailing the log file | ||
tail -f /root/cortex/cortex.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/bin/sh | ||
|
||
NGINX_CONF="/etc/nginx/conf.d/generated_routes.conf" | ||
|
||
rm -f $NGINX_CONF | ||
|
||
touch $NGINX_CONF | ||
|
||
while IFS= read -r line || [ -n "$line" ] | ||
do | ||
route=$(echo $line | awk '{print $1}') | ||
action=$(echo $line | awk '{print $2}') | ||
|
||
echo "location $route {" >> $NGINX_CONF | ||
if [ "$action" = "allow" ]; then | ||
echo " allow all;" >> $NGINX_CONF | ||
else | ||
echo " deny all;" >> $NGINX_CONF | ||
fi | ||
echo "}" >> $NGINX_CONF | ||
done < /app/routes.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
worker_processes auto; | ||
|
||
error_log /var/log/nginx/error.log notice; | ||
pid /var/run/nginx.pid; | ||
|
||
events { | ||
worker_connections 1024; | ||
} | ||
|
||
http { | ||
include /etc/nginx/mime.types; | ||
default_type application/octet-stream; | ||
|
||
log_format main '$remote_addr - $remote_user [$time_local] "$request" ' | ||
'$status $body_bytes_sent "$http_referer" ' | ||
'"$http_user_agent" "$http_x_forwarded_for"'; | ||
|
||
access_log /var/log/nginx/access.log main; | ||
|
||
sendfile on; | ||
tcp_nopush on; | ||
tcp_nodelay on; | ||
keepalive_timeout 65; | ||
types_hash_max_size 2048; | ||
|
||
server { | ||
listen 80 default_server; | ||
listen [::]:80 default_server; | ||
|
||
server_name _; | ||
|
||
# Include the generated routes configuration (location blocks only) | ||
include /etc/nginx/conf.d/generated_routes.conf; | ||
|
||
# Default location block (catch-all) | ||
location / { | ||
proxy_pass http://127.0.0.1:1337; # Forward to your backend service | ||
allow all; # Default to allow all requests | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/v1/models/start deny |