-
Notifications
You must be signed in to change notification settings - Fork 134
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'feat/unload-model-api' of https://github.com/thunhuanh/…
…nitro into feat/unload-model-api
- Loading branch information
Showing
3 changed files
with
229 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#!/bin/bash | ||
|
||
## Example run command | ||
# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf | ||
|
||
# Check for required arguments | ||
if [[ $# -ne 2 ]]; then | ||
echo "Usage: $0 <path_to_binary> <url_to_download>" | ||
exit 1 | ||
fi | ||
|
||
rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log | ||
|
||
BINARY_PATH=$1 | ||
DOWNLOAD_URL=$2 | ||
|
||
# Start the binary file | ||
"$BINARY_PATH" > /tmp/nitro.log 2>&1 & | ||
|
||
# Get the process id of the binary file | ||
pid=$! | ||
|
||
if ! ps -p $pid > /dev/null; then | ||
echo "nitro failed to start. Logs:" | ||
cat /tmp/nitro.log | ||
exit 1 | ||
fi | ||
|
||
# Wait for a few seconds to let the server start | ||
sleep 5 | ||
|
||
|
||
|
||
# Check if /tmp/testmodel exists, if not, download it | ||
if [[ ! -f "/tmp/testmodel" ]]; then | ||
wget $DOWNLOAD_URL -O /tmp/testmodel | ||
fi | ||
|
||
# Run the curl commands | ||
response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/loadModel' \ | ||
--header 'Content-Type: application/json' \ | ||
--data '{ | ||
"llama_model_path": "/tmp/testmodel", | ||
"ctx_len": 2048, | ||
"ngl": 32, | ||
"embedding": false | ||
}' 2>&1) | ||
|
||
response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/chat_completion' \ | ||
--header 'Content-Type: application/json' \ | ||
--header 'Accept: text/event-stream' \ | ||
--header 'Access-Control-Allow-Origin: *' \ | ||
--data '{ | ||
"messages": [ | ||
{"content": "Hello there", "role": "assistant"}, | ||
{"content": "Write a long and sad story for me", "role": "user"} | ||
], | ||
"stream": true, | ||
"model": "gpt-3.5-turbo", | ||
"max_tokens": 2048, | ||
"stop": ["hello"], | ||
"frequency_penalty": 0, | ||
"presence_penalty": 0, | ||
"temperature": 0.7 | ||
}' 2>&1 | ||
) | ||
|
||
error_occurred=0 | ||
if [[ "$response1" -ne 200 ]]; then | ||
echo "The first curl command failed with status code: $response1" | ||
cat /tmp/response1.log | ||
error_occurred=1 | ||
fi | ||
|
||
if [[ "$response2" -ne 200 ]]; then | ||
echo "The second curl command failed with status code: $response2" | ||
cat /tmp/response2.log | ||
error_occurred=1 | ||
fi | ||
|
||
if [[ "$error_occurred" -eq 1 ]]; then | ||
echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!" | ||
echo "Nitro Error Logs:" | ||
cat /tmp/nitro.log | ||
kill $pid | ||
exit 1 | ||
fi | ||
|
||
echo "----------------------" | ||
echo "Log load model:" | ||
cat /tmp/response1.log | ||
|
||
echo "----------------------" | ||
echo "Log run test:" | ||
cat /tmp/response2.log | ||
|
||
|
||
echo "Nitro test run successfully!" | ||
|
||
# Kill the server process | ||
kill $pid |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
@echo off | ||
|
||
set "TEMP=C:\Users\%UserName%\AppData\Local\Temp" | ||
set "MODEL_PATH=%TEMP%\testmodel" | ||
|
||
rem Check for required arguments | ||
if "%~2"=="" ( | ||
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download^> | ||
exit /b 1 | ||
) | ||
|
||
set "BINARY_PATH=%~1" | ||
set "DOWNLOAD_URL=%~2" | ||
|
||
for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi" | ||
|
||
echo BINARY_NAME=%BINARY_NAME% | ||
|
||
del %TEMP%\response1.log 2>nul | ||
del %TEMP%\response2.log 2>nul | ||
del %TEMP%\nitro.log 2>nul | ||
|
||
rem Start the binary file | ||
start /B "" "%BINARY_PATH%" > %TEMP%\nitro.log 2>&1 | ||
|
||
ping -n 6 127.0.0.1 > nul | ||
|
||
rem Capture the PID of the started process with "nitro" in its name | ||
for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do ( | ||
set "pid=%%a" | ||
) | ||
|
||
echo pid=%pid% | ||
|
||
if not defined pid ( | ||
echo nitro failed to start. Logs: | ||
type %TEMP%\nitro.log | ||
exit /b 1 | ||
) | ||
|
||
rem Wait for a few seconds to let the server start | ||
|
||
rem Check if %TEMP%\testmodel exists, if not, download it | ||
if not exist "%MODEL_PATH%" ( | ||
bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%" | ||
) | ||
|
||
rem Define JSON strings for curl data | ||
call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" | ||
set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}" | ||
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":2048,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.7}" | ||
|
||
rem Print the values of curl_data1 and curl_data2 for debugging | ||
echo curl_data1=%curl_data1% | ||
echo curl_data2=%curl_data2% | ||
|
||
rem Run the curl commands and capture the status code | ||
curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 | ||
|
||
curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/chat_completion" ^ | ||
--header "Content-Type: application/json" ^ | ||
--header "Accept: text/event-stream" ^ | ||
--header "Access-Control-Allow-Origin: *" ^ | ||
--data "%curl_data2%" > %TEMP%\response2_code.log 2>&1 | ||
|
||
set "error_occurred=0" | ||
|
||
rem Read the status codes from the log files | ||
for /f %%a in (%TEMP%\response1_code.log) do set "response1=%%a" | ||
for /f %%a in (%TEMP%\response2_code.log) do set "response2=%%a" | ||
|
||
if "%response1%" neq "200" ( | ||
echo The first curl command failed with status code: %response1% | ||
type %TEMP%\response1.log | ||
set "error_occurred=1" | ||
) | ||
|
||
if "%response2%" neq "200" ( | ||
echo The second curl command failed with status code: %response2% | ||
type %TEMP%\response2.log | ||
set "error_occurred=1" | ||
) | ||
|
||
if "%error_occurred%"=="1" ( | ||
echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!! | ||
echo Nitro Error Logs: | ||
type %TEMP%\nitro.log | ||
taskkill /f /pid %pid% | ||
exit /b 1 | ||
) | ||
|
||
|
||
echo ---------------------- | ||
echo Log load model: | ||
type %TEMP%\response1.log | ||
|
||
echo ---------------------- | ||
echo "Log run test:" | ||
type %TEMP%\response2.log | ||
|
||
echo Nitro test run successfully! | ||
|
||
rem Kill the server process | ||
taskkill /f /pid %pid% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,13 +5,14 @@ on: | |
branches: | ||
- main | ||
tags: ['v*.*.*'] | ||
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] | ||
paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] | ||
pull_request: | ||
types: [opened, synchronize, reopened] | ||
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] | ||
paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] | ||
|
||
env: | ||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | ||
MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf | ||
|
||
jobs: | ||
create-draft-release: | ||
|
@@ -73,6 +74,9 @@ jobs: | |
mkdir -p nitro | ||
cp build/nitro nitro/ | ||
zip -r nitro.zip nitro | ||
# run e2e testing | ||
cd nitro | ||
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} | ||
- uses: actions/[email protected] | ||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | ||
|
@@ -118,6 +122,10 @@ jobs: | |
mkdir -p nitro | ||
cp build/nitro nitro/ | ||
zip -r nitro.zip nitro | ||
# run e2e testing | ||
cd nitro | ||
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} | ||
- uses: actions/[email protected] | ||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | ||
|
@@ -165,6 +173,10 @@ jobs: | |
cp llama.cpp/ggml-metal.metal nitro/ | ||
cp build/nitro nitro/ | ||
zip -r nitro.zip nitro | ||
# run e2e testing | ||
cd nitro | ||
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} | ||
- uses: actions/[email protected] | ||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | ||
|
@@ -210,6 +222,10 @@ jobs: | |
mkdir -p nitro | ||
cp build/nitro nitro/ | ||
zip -r nitro.zip nitro | ||
# run e2e testing | ||
cd nitro | ||
chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} | ||
- uses: actions/[email protected] | ||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | ||
|
@@ -279,6 +295,8 @@ jobs: | |
robocopy ext_libs .\build\Release libcrypto-3-x64.dll | ||
robocopy ext_libs .\build\Release libssl-3-x64.dll | ||
7z a nitro.zip .\build\Release\* | ||
cd .\build\Release | ||
..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }} | ||
- uses: actions/[email protected] | ||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | ||
|
@@ -287,7 +305,7 @@ jobs: | |
with: | ||
upload_url: ${{ needs.create-draft-release.outputs.upload_url }} | ||
asset_path: ./nitro.zip | ||
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}.zip | ||
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64.zip | ||
asset_content_type: application/zip | ||
|
||
windows-amd64-cuda-build: | ||
|
@@ -339,6 +357,8 @@ jobs: | |
robocopy ext_libs .\build\Release libcrypto-3-x64.dll | ||
robocopy ext_libs .\build\Release libssl-3-x64.dll | ||
7z a nitro.zip .\build\Release\* | ||
cd .\build\Release | ||
..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }} | ||
- uses: actions/[email protected] | ||
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') | ||
|
@@ -347,7 +367,7 @@ jobs: | |
with: | ||
upload_url: ${{ needs.create-draft-release.outputs.upload_url }} | ||
asset_path: ./nitro.zip | ||
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}-cu${{ matrix.cuda }}.zip | ||
asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-cuda.zip | ||
asset_content_type: application/zip | ||
|
||
update_release_draft: | ||
|