Skip to content

Commit

Permalink
feat: e2e embedding endpoint scripts (#511)
Browse files Browse the repository at this point in the history
  • Loading branch information
vansangpfiev authored Apr 16, 2024
1 parent 2c33000 commit d820e06
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 32 deletions.
93 changes: 80 additions & 13 deletions .github/scripts/e2e-test-llama-linux-and-mac.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf

# Check for required arguments
if [[ $# -ne 2 ]]; then
echo "Usage: $0 <path_to_binary> <url_to_download>"
if [[ $# -ne 3 ]]; then
echo "Usage: $0 <path_to_binary> <url_to_download_llm> <url_to_download_embedding>"
exit 1
fi

rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log
rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log

BINARY_PATH=$1
DOWNLOAD_URL=$2
DOWNLOAD_LLM_URL=$2
DOWNLOAD_EMBEDDING_URL=$3

# Random port to ensure it's not used
min=10000
Expand All @@ -37,11 +38,16 @@ sleep 5

# Check if /tmp/testllm exists, if not, download it
if [[ ! -f "/tmp/testllm" ]]; then
curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testllm
curl --connect-timeout 300 $DOWNLOAD_LLM_URL --output /tmp/testllm
fi

# Check if /tmp/test-embedding exists, if not, download it
if [[ ! -f "/tmp/test-embedding" ]]; then
curl --connect-timeout 300 $DOWNLOAD_EMBEDDING_URL --output /tmp/test-embedding
fi

# Run the curl commands
response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm",
Expand All @@ -57,7 +63,7 @@ if ! ps -p $pid >/dev/null; then
fi

response2=$(
curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
curl --connect-timeout 60 -o /tmp/completion-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
--header 'Content-Type: application/json' \
--header 'Accept: text/event-stream' \
--header 'Access-Control-Allow-Origin: *' \
Expand All @@ -76,16 +82,65 @@ response2=$(
}'
)

# unload model
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/testllm"
}')

# load embedding model
response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
--header 'Content-Type: application/json' \
--data '{
"llama_model_path": "/tmp/test-embedding",
"ctx_len": 50,
"ngl": 32,
"embedding": true,
"model_type": "embedding"
}')

# request embedding
response5=$(
curl --connect-timeout 60 -o /tmp/embedding-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/embeddings" \
--header 'Content-Type: application/json' \
--header 'Accept: text/event-stream' \
--header 'Access-Control-Allow-Origin: *' \
--data '{
"input": "Hello",
"model": "test-embedding",
"encoding_format": "float"
}'
)

error_occurred=0
if [[ "$response1" -ne 200 ]]; then
echo "The first curl command failed with status code: $response1"
cat /tmp/response1.log
echo "The load llm model curl command failed with status code: $response1"
cat /tmp/load-llm-model-res.log
error_occurred=1
fi

if [[ "$response2" -ne 200 ]]; then
echo "The second curl command failed with status code: $response2"
cat /tmp/response2.log
echo "The completion curl command failed with status code: $response2"
cat /tmp/completion-res.log
error_occurred=1
fi

if [[ "$response3" -ne 200 ]]; then
echo "The unload model curl command failed with status code: $response3"
cat /tmp/unload-model-res.log
error_occurred=1
fi

if [[ "$response4" -ne 200 ]]; then
echo "The load embedding model curl command failed with status code: $response4"
cat /tmp/load-embedding-model-res.log
error_occurred=1
fi

if [[ "$response5" -ne 200 ]]; then
echo "The embedding curl command failed with status code: $response5"
cat /tmp/embedding-res.log
error_occurred=1
fi

Expand All @@ -99,11 +154,23 @@ fi

echo "----------------------"
echo "Log load model:"
cat /tmp/response1.log
cat /tmp/load-llm-model-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/completion-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/unload-model-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/load-embedding-model-res.log

echo "----------------------"
echo "Log run test:"
cat /tmp/response2.log
cat /tmp/embedding-res.log

echo "Nitro test run successfully!"

Expand Down
85 changes: 70 additions & 15 deletions .github/scripts/e2e-test-llama-windows.bat
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
@echo off

set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"
set "MODEL_PATH=%TEMP%\testllm"
set "MODEL_LLM_PATH=%TEMP%\testllm"
set "MODEL_EMBEDDING_PATH=%TEMP%\test-embedding"

rem Check for required arguments
if "%~2"=="" (
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download^>
if "%~3"=="" (
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download_llm^> ^<url_to_download_embedding^>
exit /b 1
)

set "BINARY_PATH=%~1"
set "DOWNLOAD_URL=%~2"
set "DOWNLOAD_LLM_URL=%~2"
set "DOWNLOAD_EMBEDDING_URL=%~3"

for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"

echo BINARY_NAME=%BINARY_NAME%

del %TEMP%\response1.log 2>nul
del %TEMP%\response2.log 2>nul
del %TEMP%\response3.log 2>nul
del %TEMP%\response4.log 2>nul
del %TEMP%\response5.log 2>nul
del %TEMP%\nitro.log 2>nul

set /a min=9999
Expand Down Expand Up @@ -46,33 +51,53 @@ if not defined pid (
rem Wait for a few seconds to let the server start

rem Check if %TEMP%\testmodel exists, if not, download it
if not exist "%MODEL_PATH%" (
curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%"
if not exist "%MODEL_LLM_PATH%" (
curl.exe --connect-timeout 300 %DOWNLOAD_LLM_URL% --output "%MODEL_LLM_PATH%"
)

rem Define JSON strings for curl data
call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}"
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
if not exist "%MODEL_EMBEDDING_PATH%" (
curl.exe --connect-timeout 300 %DOWNLOAD_EMBEDDING_URL% --output "%MODEL_EMBEDDING_PATH%"
)

rem Print the values of curl_data1 and curl_data2 for debugging
rem Define JSON strings for curl data
call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%"
call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%"
set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}"
set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}"

rem Print the values of curl_data for debugging
echo curl_data1=%curl_data1%
echo curl_data2=%curl_data2%
echo curl_data3=%curl_data3%
echo curl_data4=%curl_data4%
echo curl_data5=%curl_data5%

rem Run the curl commands and capture the status code
curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
--header "Content-Type: application/json" ^
--header "Accept: text/event-stream" ^
--header "Access-Control-Allow-Origin: *" ^
--data "%curl_data2%" > %TEMP%\response2.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1

curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^
--header "Content-Type: application/json" ^
--data "%curl_data5%" > %TEMP%\response5.log 2>&1

set "error_occurred=0"

rem Read the status codes from the log files
for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"
for /f %%a in (%TEMP%\response2.log) do set "response2=%%a"
for /f %%a in (%TEMP%\response3.log) do set "response3=%%a"
for /f %%a in (%TEMP%\response4.log) do set "response4=%%a"
for /f %%a in (%TEMP%\response5.log) do set "response5=%%a"

if "%response1%" neq "200" (
echo The first curl command failed with status code: %response1%
Expand All @@ -86,6 +111,24 @@ if "%response2%" neq "200" (
set "error_occurred=1"
)

if "%response3%" neq "200" (
echo The third curl command failed with status code: %response3%
type %TEMP%\response3.log
set "error_occurred=1"
)

if "%response4%" neq "200" (
echo The fourth curl command failed with status code: %response4%
type %TEMP%\response4.log
set "error_occurred=1"
)

if "%response5%" neq "200" (
echo The fifth curl command failed with status code: %response5%
type %TEMP%\response5.log
set "error_occurred=1"
)

if "%error_occurred%"=="1" (
echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
echo Nitro Error Logs:
Expand All @@ -96,13 +139,25 @@ if "%error_occurred%"=="1" (


echo ----------------------
echo Log load model:
echo Log load llm model:
type %TEMP%\response1.log

echo ----------------------
echo "Log run test:"
echo Log run test:
type %TEMP%\response2.log

echo ----------------------
echo Log unload model:
type %TEMP%\response3.log

echo ----------------------
echo Log load embedding model:
type %TEMP%\response3.log

echo ----------------------
echo Log run embedding test:
type %TEMP%\response5.log

echo Nitro test run successfully!

rem Kill the server process
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
WHISPER_MODEL_URL: https://delta.jan.ai/ggml-tiny-q5_1.bin
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf

jobs:
create-draft-release:
Expand Down Expand Up @@ -186,7 +187,7 @@ jobs:
run: |
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
rm -rf uploads/
- name: Run e2e testing - Whisper.CPP
Expand Down Expand Up @@ -307,7 +308,7 @@ jobs:
run: |
# run e2e testing
cd nitro/
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
rm -rf uploads/
- name: Run e2e testing - Whisper.CPP
Expand Down Expand Up @@ -373,7 +374,7 @@ jobs:
run: |
# run e2e testing
cd nitro
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
rm -rf uploads/
- name: Run e2e testing - Whisper.CPP
Expand Down Expand Up @@ -519,7 +520,7 @@ jobs:
if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
run: |
cd build\Release
..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }}
..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
rmdir /S /Q .\build\Release\uploads
- name: Run e2e testing - Whisper.cpp
Expand Down

0 comments on commit d820e06

Please sign in to comment.