Skip to content

fix: support ctx_len for model start cli #1618

fix: support ctx_len for model start cli

fix: support ctx_len for model start cli #1618

name: CI Quality Gate Cortex CPP
on:
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
paths: ["engine/**", ".github/workflows/cortex-cpp-quality-gate.yml"]
workflow_dispatch:
schedule:
- cron: '0 22 * * *'
env:
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
jobs:
build-and-test:
runs-on: ${{ matrix.runs-on }}
timeout-minutes: 60
strategy:
fail-fast: false
matrix:
include:
- os: "linux"
name: "amd64"
runs-on: "ubuntu-20-04-cuda-12-0"
cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
build-deps-cmake-flags: ""
ccache-dir: ""
- os: "mac"
name: "amd64"
runs-on: "macos-selfhosted-12"
cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
build-deps-cmake-flags: ""
ccache-dir: ""
- os: "mac"
name: "arm64"
runs-on: "macos-silicon"
cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DMAC_ARM64=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
build-deps-cmake-flags: ""
ccache-dir: ""
- os: "windows"
name: "amd64"
runs-on: "windows-cuda-12-0"
cmake-flags: "-DCORTEX_CPP_VERSION=${{github.event.pull_request.head.sha}} -DCMAKE_BUILD_TEST=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_TOOLCHAIN_FILE=C:/w/cortex.cpp/cortex.cpp/engine/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows-static -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja"
build-deps-cmake-flags: "-DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache -GNinja"
ccache-dir: 'C:\Users\ContainerAdministrator\AppData\Local\ccache'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
submodules: recursive
- name: use python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install tools on Linux
if: runner.os == 'Linux'
run: |
python3 -m pip install awscli
- name: Install choco on Windows
if: runner.os == 'Windows'
run: |
choco install make pkgconfiglite ccache awscli 7zip ninja -y
- name: Download ccache from s3
if: runner.os == 'Windows'
continue-on-error: true
run: |
Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
refreshenv
aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-${{ matrix.os }}-${{ matrix.name }} ${{ matrix.ccache-dir }} --endpoint ${{ secrets.MINIO_ENDPOINT }}
aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-windows C:\Users\ContainerAdministrator\AppData\Local\vcpkg --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
- name: Download vcpkg cache from s3
if: runner.os == 'Linux'
continue-on-error: true
run: |
aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux /home/runner/.cache/vcpkg --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
- name: Configure vcpkg
run: |
cd engine
make configure-vcpkg
- name: Build
run: |
cd engine
make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}"
- name: Run setup config
run: |
cd engine
echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" > ~/.cortexrc
echo "gitHubToken: ${{ secrets.PAT_SERVICE_ACCOUNT }}" >> ~/.cortexrc
# ./build/cortex
cat ~/.cortexrc
- name: Run unit tests
run: |
cd engine
make run-unit-tests
env:
GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
- name: Run setup config
run: |
cd engine
echo "apiServerPort: 3928" > ~/.cortexrc
echo "huggingFaceToken: ${{ secrets.HUGGINGFACE_TOKEN_READ }}" >> ~/.cortexrc
echo "gitHubToken: ${{ secrets.PAT_SERVICE_ACCOUNT }}" >> ~/.cortexrc
# ./build/cortex
cat ~/.cortexrc
- name: Run e2e tests
if: runner.os != 'Windows' && github.event.pull_request.draft == false
run: |
cd engine
cp build/cortex build/cortex-nightly
cp build/cortex build/cortex-beta
python -m pip install --upgrade pip
python -m pip install -r e2e-test/requirements.txt
python e2e-test/main.py
rm build/cortex-nightly
rm build/cortex-beta
env:
GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
- name: Run e2e tests
if: runner.os == 'Windows' && github.event.pull_request.draft == false
run: |
cd engine
cp build/cortex.exe build/cortex-nightly.exe
cp build/cortex.exe build/cortex-beta.exe
python -m pip install --upgrade pip
python -m pip install -r e2e-test/requirements.txt
python e2e-test/main.py
rm build/cortex-nightly.exe
rm build/cortex-beta.exe
env:
GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
- name: Run e2e tests
if: github.event_name == 'schedule' && runner.os != 'Windows' && github.event.pull_request.draft == false
run: |
cd engine
cp build/cortex build/cortex-nightly
cp build/cortex build/cortex-beta
python -m pip install --upgrade pip
python -m pip install -r e2e-test/requirements.txt
python e2e-test/cortex-llamacpp-e2e-nightly.py
rm build/cortex-nightly
rm build/cortex-beta
env:
GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
- name: Run e2e tests
if: github.event_name == 'schedule' && runner.os == 'Windows' && github.event.pull_request.draft == false
run: |
cd engine
cp build/cortex.exe build/cortex-nightly.exe
cp build/cortex.exe build/cortex-beta.exe
python -m pip install --upgrade pip
python -m pip install -r e2e-test/requirements.txt
python e2e-test/cortex-llamacpp-e2e-nightly.py
rm build/cortex-nightly.exe
rm build/cortex-beta.exe
env:
GITHUB_TOKEN: ${{ secrets.PAT_SERVICE_ACCOUNT }}
- name: Pre-package
run: |
cd engine
make pre-package DESTINATION_BINARY_NAME="cortex"
- name: Package
run: |
cd engine
make package
- name: Upload Artifact
uses: actions/upload-artifact@v4
with:
name: cortex-${{ matrix.os }}-${{ matrix.name }}
path: ./engine/cortex
- name: Upload windows ccache to s3
continue-on-error: true
if: always() && runner.os == 'Windows'
run: |
Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
refreshenv
aws s3 sync ${{ matrix.ccache-dir }} s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-${{ matrix.os }}-${{ matrix.name }} --endpoint ${{ secrets.MINIO_ENDPOINT }}
aws s3 sync C:\Users\ContainerAdministrator\AppData\Local\vcpkg s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-windows --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
- name: Upload linux vcpkg cache to s3
continue-on-error: true
if: always() && runner.os == 'Linux'
run: |
aws s3 sync /home/runner/.cache/vcpkg s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux --endpoint ${{ secrets.MINIO_ENDPOINT }}
env:
AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
build-docker-and-test:
runs-on: ubuntu-24-04-docker
steps:
- name: Getting the repo
uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: Run Docker
if: github.event_name != 'schedule'
run: |
docker build \
--build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
--build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
--build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
--build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
-t menloltd/cortex:test -f docker/Dockerfile.cache .
docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
sleep 20
- name: Run Docker
if: github.event_name == 'schedule'
run: |
latest_prerelease=$(curl -s https://api.github.com/repos/cortexcpp/cortex.cpp/releases | jq -r '.[] | select(.prerelease == true) | .tag_name' | head -n 1)
echo "cortex.llamacpp latest release: $latest_prerelease"
docker build \
--build-arg REMOTE_CACHE_URL="${{ secrets.MINIO_ENDPOINT }}/vcpkg-cache" \
--build-arg MINIO_ENDPOINT_URL="${{ secrets.MINIO_ENDPOINT }}" \
--build-arg MINIO_ACCESS_KEY="${{ secrets.MINIO_ACCESS_KEY_ID }}" \
--build-arg MINIO_SECRET_KEY="${{ secrets.MINIO_SECRET_ACCESS_KEY }}" \
--build-arg CORTEX_CPP_VERSION="${latest_prerelease}" \
-t menloltd/cortex:test -f docker/Dockerfile.cache .
docker run -it -d -p 3928:39281 --name cortex menloltd/cortex:test
sleep 20
- name: use python
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Run e2e tests
run: |
cd engine
python -m pip install --upgrade pip
python -m pip install -r e2e-test/requirements.txt
pytest e2e-test/test_api_docker.py
- name: Run Docker
continue-on-error: true
if: always()
run: |
docker logs cortex
docker stop cortex
docker rm cortex
echo "y\n" | docker system prune -af