Feature dockerfile for cortex (#1014)

Co-authored-by: Hien To <[email protected]>
janhq · Aug 13, 2024 · f38d9dc · f38d9dc
1 parent 2562ac8
commit f38d9dc
Show file tree

Hide file tree

Showing 12 changed files with 234 additions and 50 deletions.
diff --git a/Dockerfile b/Dockerfile
diff --git a/docker-compose.yml b/docker-compose.yml
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -0,0 +1,36 @@
+# Please change the base image to the appropriate CUDA version base on NVIDIA Driver Compatibility
+# Run nvidia-smi to check the CUDA version and the corresponding driver version
+# Then update the base image to the appropriate CUDA version refer https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags
+
+FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 AS base 
+
+# 1. Install dependencies only when needed
+FROM base AS devel
+
+# Install g++ 11
+RUN apt update && apt install -y gcc-11 g++-11 cpp-11 jq xsel curl gnupg make python3-dev && curl -sL https://deb.nodesource.com/setup_20.x | bash - && apt install nodejs -y && rm -rf /var/lib/apt/lists/*
+
+# Update alternatives for GCC and related tools
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 110 \
+                         --slave /usr/bin/g++ g++ /usr/bin/g++-11 \
+                         --slave /usr/bin/gcov gcov /usr/bin/gcov-11 \
+                         --slave /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 \
+                         --slave /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 && \
+    update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 110
+
+RUN npm install -g yarn
+
+WORKDIR /app
+
+FROM devel AS release
+
+EXPOSE 1337
+
+COPY ./common/entrypoint.sh /usr/local/bin/entrypoint.sh
+
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+HEALTHCHECK --interval=300s --timeout=30s --start-period=10s --retries=3 \
+  CMD curl -f http://127.0.0.1:1337/api/system || exit 1
+
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
diff --git a/docker/Dockerfile.firewall b/docker/Dockerfile.firewall
@@ -0,0 +1,45 @@
+# Please change the base image to the appropriate CUDA version base on NVIDIA Driver Compatibility
+# Run nvidia-smi to check the CUDA version and the corresponding driver version
+# Then update the base image to the appropriate CUDA version refer https://catalog.ngc.nvidia.com/orgs/nvidia/containers/cuda/tags
+
+FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 AS base 
+
+# 1. Install dependencies only when needed
+FROM base AS devel
+
+# Install g++ 11
+RUN apt update && apt install -y gcc-11 g++-11 cpp-11 jq xsel curl gnupg make python3-dev dnsmasq nginx iproute2 && curl -sL https://deb.nodesource.com/setup_20.x | bash - && apt install nodejs -y && rm -rf /var/lib/apt/lists/*
+
+# Update alternatives for GCC and related tools
+RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 110 \
+                         --slave /usr/bin/g++ g++ /usr/bin/g++-11 \
+                         --slave /usr/bin/gcov gcov /usr/bin/gcov-11 \
+                         --slave /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 \
+                         --slave /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 && \
+    update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 110
+
+RUN npm install -g yarn
+
+RUN mkdir -p /etc/dnsmasq.d/
+
+WORKDIR /app
+
+FROM devel AS release
+
+EXPOSE 80
+
+COPY ./common/dnsmasq.conf /etc/dnsmasq.conf
+COPY ./common/blocked-domains.txt /etc/dnsmasq.d/blocked-domains.txt
+
+COPY ./common/entrypoint-firewall.sh /usr/local/bin/entrypoint.sh
+
+COPY ./common/routes.txt /app/routes.txt
+COPY ./common/generate_nginx_conf.sh /usr/local/bin/generate_nginx_conf.sh
+COPY ./common/nginx.conf /etc/nginx/nginx.conf
+
+RUN chmod +x /usr/local/bin/entrypoint.sh
+
+HEALTHCHECK --interval=300s --timeout=30s --start-period=10s --retries=3 \
+  CMD curl -f http://127.0.0.1/api/system || exit 1
+
+ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
diff --git a/docker/README.md b/docker/README.md
@@ -0,0 +1,46 @@
+# Docker with cortex
+
+We offer two methods for deploying the Cortex environment on Docker.
+
+## Method 1: Use the default Dockerfile with Cortex.
+
+To use this method, you need to follow these steps:
+```bash
+git clone https://github.com/janhq/cortex.git
+cd cortex/docker
+docker build -t cortex:latest .
+
+# Run the container with GPU support
+docker run -it --gpus all -d -p 1337:1337 cortex:latest
+
+# Run the container with CPU support
+docker run -it -d -p 1337:1337 cortex:latest
+
+# After starting, you can access Swagger at http://localhost:1337/api and the API server at http://localhost:1337.
+# Additionally, you can exec into the container and use cortex-cli to perform other operations.
+```
+
+## Method 2: Use Dockerfile.firewall with the feature to block outbound connections by domain and block inbound connections by API path.
+
+The use case for this method is when you want to host the Cortex API 100% offline, preventing access to remote models like the OpenAI API. Alternatively, you might want to block inbound connections by restricting clients from calling the API to load models `/v1/models/start`.
+
+To use this method, you need to follow these steps:
+
+- Step 1: Edit the contents of the [blocked-domains.txt](./docker/common/blocked-domains.txt) file according to your requirements. Refer to the provided examples in the file. The goal is to block outbound connections to the domains you do not want to allow.
+- Step 2: Edit the contents of the [blocked-paths.txt](./docker/common/blocked-paths.txt) file according to your requirements. Refer to the provided examples in the file. The goal is to block inbound connections to the paths you do not want to allow.
+- Step 3: Build the image with Dockerfile.firewall following the instructions below:
+
+    ```bash
+    git clone https://github.com/janhq/cortex.git
+    cd cortex/docker
+    docker build -f Dockerfile.firewall -t cortex-with-firewall:latest .
+
+    # Run the container with GPU support
+    docker run -it --gpus all -d -p 1337:1337 cortex:latest
+
+    # Run the container with CPU support
+    docker run -it -d -p 1337:1337 cortex:latest
+
+    # After starting, you can access Swagger at http://localhost:1337/api and the API server at http://localhost:1337.
+    # Additionally, you can exec into the container and use cortex-cli to perform other operations.
+    ```
diff --git a/docker/common/blocked-domains.txt b/docker/common/blocked-domains.txt
@@ -0,0 +1,5 @@
+# Block IPv4 of domain  openai.com and all subdomains *.openai.com
+address=/openai.com/0.0.0.0
+
+# Block IPv6 of domain  openai.com and all subdomains *.openai.com
+address=/openai.com/::
diff --git a/docker/common/dnsmasq.conf b/docker/common/dnsmasq.conf
@@ -0,0 +1,5 @@
+server=8.8.8.8
+
+no-resolv
+
+conf-file=/etc/dnsmasq.d/blocked-domains.txt
diff --git a/docker/common/entrypoint-firewall.sh b/docker/common/entrypoint-firewall.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+# Setup DNS resolution with dnsmasq
+echo "nameserver 127.0.0.1" > /etc/resolv.conf
+dnsmasq -k &
+
+# Generate Nginx configuration from routes.txt
+/usr/local/bin/generate_nginx_conf.sh
+
+# Install cortex
+npm install -g cortexso
+
+# Start cortex
+cortex -a 127.0.0.1
+
+cortex engines llamacpp init
+cortex engines tensorrt-llm init
+
+# Start nginx
+nginx -g 'daemon off;' &
+
+# Keep the container running by tailing the log file
+tail -f /root/cortex/cortex.log
diff --git a/docker/common/entrypoint.sh b/docker/common/entrypoint.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+npm install -g cortexso
+# Run cortex
+cortex  -a 0.0.0.0
+
+cortex engines llamacpp init
+cortex engines tensorrt-llm init
+
+# Keep the container running by tailing the log file
+tail -f /root/cortex/cortex.log
diff --git a/docker/common/generate_nginx_conf.sh b/docker/common/generate_nginx_conf.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+NGINX_CONF="/etc/nginx/conf.d/generated_routes.conf"
+
+rm -f $NGINX_CONF
+
+touch $NGINX_CONF
+
+while IFS= read -r line || [ -n "$line" ]
+do
+    route=$(echo $line | awk '{print $1}')
+    action=$(echo $line | awk '{print $2}')
+
+    echo "location $route {" >> $NGINX_CONF
+    if [ "$action" = "allow" ]; then
+        echo "    allow all;" >> $NGINX_CONF
+    else
+        echo "    deny all;" >> $NGINX_CONF
+    fi
+    echo "}" >> $NGINX_CONF
+done < /app/routes.txt
diff --git a/docker/common/nginx.conf b/docker/common/nginx.conf
@@ -0,0 +1,41 @@
+worker_processes  auto;
+
+error_log  /var/log/nginx/error.log notice;
+pid        /var/run/nginx.pid;
+
+events {
+    worker_connections  1024;
+}
+
+http {
+    include       /etc/nginx/mime.types;
+    default_type  application/octet-stream;
+
+    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
+                      '$status $body_bytes_sent "$http_referer" '
+                      '"$http_user_agent" "$http_x_forwarded_for"';
+
+    access_log  /var/log/nginx/access.log  main;
+
+    sendfile        on;
+    tcp_nopush      on;
+    tcp_nodelay     on;
+    keepalive_timeout  65;
+    types_hash_max_size 2048;
+
+    server {
+        listen 80 default_server;
+        listen [::]:80 default_server;
+
+        server_name _;
+
+        # Include the generated routes configuration (location blocks only)
+        include /etc/nginx/conf.d/generated_routes.conf;
+
+        # Default location block (catch-all)
+        location / {
+            proxy_pass http://127.0.0.1:1337;  # Forward to your backend service
+            allow all;  # Default to allow all requests
+        }
+    }
+}
diff --git a/docker/common/routes.txt b/docker/common/routes.txt
@@ -0,0 +1 @@
+/v1/models/start deny