Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: python runtime engine #559

Merged
merged 18 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions .github/scripts/e2e-test-python-linux-and-mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash

## Example run command
# ./e2e-test-python-linux-and-mac.sh '../../examples/build/server' './e2e-test.py'

# Check for required arguments
if [[ $# -ne 2 ]]; then
echo "Usage: $0 <path_to_binary> <path_to_python_file>"
exit 1
fi

BINARY_PATH=$1
PYTHON_FILE_EXECUTION_PATH=$2

rm /tmp/python-file-execution-res.log /tmp/server.log

# Random port to ensure it's not used
min=10000
max=11000
range=$((max - min + 1))
PORT=$((RANDOM % range + min))

# Install numpy for Python
export PYTHONHOME=$(pwd)/engines/cortex.python/python/
export LD_LIBRARY_PATH="$PYTHONHOME:$LD_LIBRARY_PATH"
export DYLD_FALLBACK_LIBRARY_PATH="$PYTHONHOME:$DYLD_FALLBACK_LIBRARY_PATH"
echo "Set Python HOME to $PYTHONHOME"
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
./engines/cortex.python/python/bin/python3 -m ensurepip
./engines/cortex.python/python/bin/python3 -m pip install --upgrade pip
./engines/cortex.python/python/bin/python3 -m pip install numpy --target=$PYTHONHOME/lib/python/site-packages/

# Start the binary file
"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/server.log &

pid=$!

if ! ps -p $pid >/dev/null; then
echo "server failed to start. Logs:"
cat /tmp/server.log
exit 1
fi

# Wait for a few seconds to let the server start
sleep 3

# Run the curl commands
response1=$(curl --connect-timeout 60 -o /tmp/python-file-execution-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/fine_tuning/job" \
--header 'Content-Type: application/json' \
--data '{
"file_execution_path": "'$PYTHON_FILE_EXECUTION_PATH'"
}')

error_occurred=0

# Verify the response
if [[ "$response1" -ne 200 ]]; then
echo "The python file execution curl command failed with status code: $response1"
cat /tmp/python-file-execution-res.log
error_occurred=1
fi

# Verify the output of the Python file in output.txt
OUTPUT_FILE="./output.txt"
EXPECTED_OUTPUT="1 2 3" # Replace with the expected content

if [[ -f "$OUTPUT_FILE" ]]; then
actual_output=$(cat "$OUTPUT_FILE")
if [[ "$actual_output" != "$EXPECTED_OUTPUT" ]]; then
echo "The output of the Python file does not match the expected output."
echo "Expected: $EXPECTED_OUTPUT"
echo "Actual: $actual_output"
error_occurred=1
else
echo "The output of the Python file matches the expected output."
fi
else
echo "Output file $OUTPUT_FILE does not exist."
error_occurred=1
fi


if [[ "$error_occurred" -eq 1 ]]; then
echo "Server test run failed!!!!!!!!!!!!!!!!!!!!!!"
echo "Server Error Logs:"
cat /tmp/server.log
kill $pid
echo "An error occurred while running the server."
exit 1
fi

echo "----------------------"
echo "Log server:"
cat /tmp/server.log

echo "Server test run successfully!"

# Kill the server process
kill $pid
119 changes: 119 additions & 0 deletions .github/scripts/e2e-test-python-windows.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
@echo off

setlocal enabledelayedexpansion

set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"

rem Check for required arguments
if "%~2"=="" (
echo Usage: %~0 ^<path_to_binary^> ^<path_to_python_file^>
exit /b 1
)

set "BINARY_PATH=%~1"
set "PYTHON_FILE_EXECUTION_PATH=%~2"

for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"

echo BINARY_NAME=%BINARY_NAME%

del %TEMP%\response1.log 2>nul
del %TEMP%\server.log 2>nul

set /a min=9999
set /a max=11000
set /a range=max-min+1
set /a PORT=%min% + %RANDOM% %% %range%

rem Install numpy for Python
set "PYTHONHOME=%cd%\engines\cortex.python\python"
echo Set Python HOME to %PYTHONHOME%
%PYTHONHOME%\python.exe -m ensurepip
%PYTHONHOME%\python.exe -m pip install --upgrade pip
%PYTHONHOME%\python.exe -m pip install numpy --target=%PYTHONHOME%\Lib\site-packages\

rem Start the binary file
start "" /B "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > "%TEMP%\server.log" 2>&1

ping -n 3 127.0.0.1 > nul

rem Capture the PID of the started process with "server" in its name
for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
set "pid=%%a"
)

echo pid=%pid%

if not defined pid (
echo server failed to start. Logs:
type %TEMP%\server.log
echo.
exit /b 1
)

rem Wait for a few seconds to let the server start

rem Define JSON strings for curl data
call set "PYTHON_FILE_EXECUTION_PATH_STRING=%%PYTHON_FILE_EXECUTION_PATH:\=\\%%"
set "curl_data1={\"file_execution_path\":\"%PYTHON_FILE_EXECUTION_PATH_STRING%\"}"

rem Print the values of curl_data for debugging
echo curl_data1=%curl_data1%

rem Run the curl commands and capture the status code
curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/fine_tuning/job" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1

set "error_occurred=0"

rem Read the status code directly from the response file
set "response1="
for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"

if "%response1%" neq "200" (
echo The first curl command failed with status code: %response1%
type %TEMP%\response1.log
echo.
set "error_occurred=1"
)

echo ----------------------
echo Log python file execution:
type %TEMP%\response1.log
echo.

rem Verification step: Check the contents of output.txt
set "expected_output=1 2 3"
set "actual_output="
if exist "output.txt" (
for /f "delims=" %%x in (output.txt) do set "actual_output=%%x"
if "!actual_output!"=="!expected_output!" (
echo Verification succeeded: output.txt contains the expected data.
) else (
echo Verification failed: output.txt does not contain the expected data.
echo Expected: !expected_output!
echo Actual: !actual_output!
set "error_occurred=1"
)
) else (
echo Verification failed: output.txt does not exist.
set "error_occurred=1"
)

echo ----------------------
echo Server logs:
type %TEMP%\server.log
echo.

if "%error_occurred%"=="1" (
echo Server test run failed!!!!!!!!!!!!!!!!!!!!!!
taskkill /f /pid %pid%
echo An error occurred while running the server.
exit /b 1
)

echo Server test run successfully!

rem Kill the server process
taskkill /f /im server.exe 2>nul || exit /B 0

endlocal
9 changes: 9 additions & 0 deletions .github/scripts/python-file-to-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import sys;
for path in sys.path:
print(path)

import numpy as np
print("Numpy version: " + np.__version__)

with open('output.txt', 'w') as file:
file.write(' '.join(map(str, np.array([1, 2, 3]))))
26 changes: 26 additions & 0 deletions .github/workflows/cortex-cpp-quality-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ on:
env:
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
PYTHON_FILE_EXECUTION_PATH: "python-file-to-test.py"

jobs:
build-and-test:
Expand All @@ -26,107 +27,126 @@ jobs:
runs-on: "ubuntu-18-04"
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF"
run-e2e: true
run-python-e2e: true

- os: "linux"
name: "amd64-avx"
runs-on: "ubuntu-18-04"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
run-e2e: false
run-python-e2e: false

- os: "linux"
name: "amd64-avx512"
runs-on: "ubuntu-18-04"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
run-e2e: false
run-python-e2e: false

- os: "linux"
name: "amd64-vulkan"
runs-on: "ubuntu-18-04-cuda-11-7"
cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
run-e2e: false
run-python-e2e: false

- os: "linux"
name: "amd64-cuda-11-7"
runs-on: "ubuntu-18-04-cuda-11-7"
cmake-flags: "-DCUDA_11_7=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
run-e2e: false
run-python-e2e: false

- os: "linux"
name: "amd64-cuda-12-0"
runs-on: "ubuntu-18-04-cuda-12-0"
cmake-flags: "-DCUDA_12_0=ON -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
run-e2e: false
run-python-e2e: false

- os: "mac"
name: "amd64"
runs-on: "macos-13"
cmake-flags: ""
run-e2e: true
run-python-e2e: true

- os: "mac"
name: "arm64"
runs-on: "mac-silicon"
cmake-flags: "-DMAC_ARM64=ON"
run-e2e: true
run-python-e2e: true

- os: "windows"
name: "amd64-avx2"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: true
run-python-e2e: true

- os: "windows"
name: "amd64-avx"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx512"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-vulkan"
runs-on: "windows-latest"
cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx2-cuda-12-0"
runs-on: "windows-cuda-12-0"
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx-cuda-12-0"
runs-on: "windows-cuda-12-0"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx512-cuda-12-0"
runs-on: "windows-cuda-12-0"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_12_0=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx2-cuda-11-7"
runs-on: "windows-cuda-11-7"
cmake-flags: "-DLLAMA_AVX2=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx-cuda-11-7"
runs-on: "windows-cuda-11-7"
cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

- os: "windows"
name: "amd64-avx512-cuda-11-7"
runs-on: "windows-cuda-11-7"
cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DCUDA_11_7=ON -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
run-e2e: false
run-python-e2e: false

steps:
- name: Clone
Expand Down Expand Up @@ -161,6 +181,12 @@ jobs:
cd cortex-cpp
make run-e2e-test RUN_TESTS=true LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}

- name: Run python e2e testing
if: ${{ matrix.run-python-e2e }}
run: |
cd cortex-cpp
make run-python-e2e-test RUN_TESTS=true PYTHON_FILE_EXECUTION_PATH=${{ env.PYTHON_FILE_EXECUTION_PATH }}

- name: Upload Artifact
uses: actions/upload-artifact@v2
with:
Expand Down
3 changes: 3 additions & 0 deletions cortex-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.5)
project(cortex-cpp C CXX)

include(engines/cortex.llamacpp/engine.cmake)
if(NOT LLAMA_CUDA AND (LLAMA_AVX2 OR APPLE))
include(engines/cortex.python/engine.cmake)
endif()
include(CheckIncludeFileCXX)

check_include_file_cxx(any HAS_ANY)
Expand Down
Loading
Loading