diff --git a/.github/scripts/e2e-test-linux-and-mac.sh b/.github/scripts/e2e-test-linux-and-mac.sh new file mode 100644 index 000000000..b8855ba8a --- /dev/null +++ b/.github/scripts/e2e-test-linux-and-mac.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +## Example run command +# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf + +# Check for required arguments +if [[ $# -ne 2 ]]; then + echo "Usage: $0 " + exit 1 +fi + +rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log + +BINARY_PATH=$1 +DOWNLOAD_URL=$2 + +# Start the binary file +"$BINARY_PATH" > /tmp/nitro.log 2>&1 & + +# Get the process id of the binary file +pid=$! + +if ! ps -p $pid > /dev/null; then + echo "nitro failed to start. Logs:" + cat /tmp/nitro.log + exit 1 +fi + +# Wait for a few seconds to let the server start +sleep 5 + + + +# Check if /tmp/testmodel exists, if not, download it +if [[ ! -f "/tmp/testmodel" ]]; then + wget $DOWNLOAD_URL -O /tmp/testmodel +fi + +# Run the curl commands +response1=$(curl -o /tmp/response1.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/loadModel' \ +--header 'Content-Type: application/json' \ +--data '{ + "llama_model_path": "/tmp/testmodel", + "ctx_len": 2048, + "ngl": 32, + "embedding": false +}' 2>&1) + +response2=$(curl -o /tmp/response2.log -s -w "%{http_code}" --location 'http://localhost:3928/inferences/llamacpp/chat_completion' \ +--header 'Content-Type: application/json' \ +--header 'Accept: text/event-stream' \ +--header 'Access-Control-Allow-Origin: *' \ +--data '{ + "messages": [ + {"content": "Hello there", "role": "assistant"}, + {"content": "Write a long and sad story for me", "role": "user"} + ], + "stream": true, + "model": "gpt-3.5-turbo", + "max_tokens": 2048, + "stop": ["hello"], + "frequency_penalty": 0, + "presence_penalty": 0, + "temperature": 0.7 + }' 2>&1 +) + +error_occurred=0 +if [[ "$response1" -ne 200 ]]; then + echo "The first curl command failed with status code: $response1" + cat /tmp/response1.log + error_occurred=1 +fi + +if [[ "$response2" -ne 200 ]]; then + echo "The second curl command failed with status code: $response2" + cat /tmp/response2.log + error_occurred=1 +fi + +if [[ "$error_occurred" -eq 1 ]]; then + echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!" + echo "Nitro Error Logs:" + cat /tmp/nitro.log + kill $pid + exit 1 +fi + +echo "----------------------" +echo "Log load model:" +cat /tmp/response1.log + +echo "----------------------" +echo "Log run test:" +cat /tmp/response2.log + + +echo "Nitro test run successfully!" + +# Kill the server process +kill $pid \ No newline at end of file diff --git a/.github/scripts/e2e-test-windows.bat b/.github/scripts/e2e-test-windows.bat new file mode 100644 index 000000000..63c45a50e --- /dev/null +++ b/.github/scripts/e2e-test-windows.bat @@ -0,0 +1,104 @@ +@echo off + +set "TEMP=C:\Users\%UserName%\AppData\Local\Temp" +set "MODEL_PATH=%TEMP%\testmodel" + +rem Check for required arguments +if "%~2"=="" ( + echo Usage: %~0 ^ ^ + exit /b 1 +) + +set "BINARY_PATH=%~1" +set "DOWNLOAD_URL=%~2" + +for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi" + +echo BINARY_NAME=%BINARY_NAME% + +del %TEMP%\response1.log 2>nul +del %TEMP%\response2.log 2>nul +del %TEMP%\nitro.log 2>nul + +rem Start the binary file +start /B "" "%BINARY_PATH%" > %TEMP%\nitro.log 2>&1 + +ping -n 6 127.0.0.1 > nul + +rem Capture the PID of the started process with "nitro" in its name +for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do ( + set "pid=%%a" +) + +echo pid=%pid% + +if not defined pid ( + echo nitro failed to start. Logs: + type %TEMP%\nitro.log + exit /b 1 +) + +rem Wait for a few seconds to let the server start + +rem Check if %TEMP%\testmodel exists, if not, download it +if not exist "%MODEL_PATH%" ( + bitsadmin.exe /transfer "DownloadTestModel" %DOWNLOAD_URL% "%MODEL_PATH%" +) + +rem Define JSON strings for curl data +call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%" +set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}" +set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":2048,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.7}" + +rem Print the values of curl_data1 and curl_data2 for debugging +echo curl_data1=%curl_data1% +echo curl_data2=%curl_data2% + +rem Run the curl commands and capture the status code +curl.exe -o %TEMP%\response1.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1_code.log 2>&1 + +curl.exe -o %TEMP%\response2.log -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/chat_completion" ^ +--header "Content-Type: application/json" ^ +--header "Accept: text/event-stream" ^ +--header "Access-Control-Allow-Origin: *" ^ +--data "%curl_data2%" > %TEMP%\response2_code.log 2>&1 + +set "error_occurred=0" + +rem Read the status codes from the log files +for /f %%a in (%TEMP%\response1_code.log) do set "response1=%%a" +for /f %%a in (%TEMP%\response2_code.log) do set "response2=%%a" + +if "%response1%" neq "200" ( + echo The first curl command failed with status code: %response1% + type %TEMP%\response1.log + set "error_occurred=1" +) + +if "%response2%" neq "200" ( + echo The second curl command failed with status code: %response2% + type %TEMP%\response2.log + set "error_occurred=1" +) + +if "%error_occurred%"=="1" ( + echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!! + echo Nitro Error Logs: + type %TEMP%\nitro.log + taskkill /f /pid %pid% + exit /b 1 +) + + +echo ---------------------- +echo Log load model: +type %TEMP%\response1.log + +echo ---------------------- +echo "Log run test:" +type %TEMP%\response2.log + +echo Nitro test run successfully! + +rem Kill the server process +taskkill /f /pid %pid% diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 006bae70a..bcea90e01 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,13 +5,14 @@ on: branches: - main tags: ['v*.*.*'] - paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] + paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] pull_request: types: [opened, synchronize, reopened] - paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] + paths: ['.github/scripts/**','.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu'] env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + MODEL_URL: https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf jobs: create-draft-release: @@ -72,6 +73,12 @@ jobs: run: | mkdir -p nitro cp build/nitro nitro/ + + # run e2e testing + cd nitro + chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} + cd .. + zip -r nitro.zip nitro - uses: actions/upload-release-asset@v1.0.1 @@ -117,6 +124,12 @@ jobs: run: | mkdir -p nitro cp build/nitro nitro/ + + # run e2e testing + cd nitro + chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} + cd .. + zip -r nitro.zip nitro - uses: actions/upload-release-asset@v1.0.1 @@ -164,6 +177,12 @@ jobs: mkdir -p nitro cp llama.cpp/ggml-metal.metal nitro/ cp build/nitro nitro/ + + # run e2e testing + cd nitro + chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} + cd .. + zip -r nitro.zip nitro - uses: actions/upload-release-asset@v1.0.1 @@ -209,6 +228,12 @@ jobs: run: | mkdir -p nitro cp build/nitro nitro/ + + # run e2e testing + cd nitro + chmod +x ../.github/scripts/e2e-test-linux-and-mac.sh && ../.github/scripts/e2e-test-linux-and-mac.sh ./nitro ${{ env.MODEL_URL }} + cd .. + zip -r nitro.zip nitro - uses: actions/upload-release-asset@v1.0.1 @@ -278,6 +303,11 @@ jobs: robocopy build\bin\Release .\build\Release llama.dll robocopy ext_libs .\build\Release libcrypto-3-x64.dll robocopy ext_libs .\build\Release libssl-3-x64.dll + + cd .\build\Release + ..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }} + cd ..\.. + 7z a nitro.zip .\build\Release\* - uses: actions/upload-release-asset@v1.0.1 @@ -287,7 +317,7 @@ jobs: with: upload_url: ${{ needs.create-draft-release.outputs.upload_url }} asset_path: ./nitro.zip - asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}.zip + asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64.zip asset_content_type: application/zip windows-amd64-cuda-build: @@ -338,6 +368,11 @@ jobs: robocopy build\bin\Release .\build\Release llama.dll robocopy ext_libs .\build\Release libcrypto-3-x64.dll robocopy ext_libs .\build\Release libssl-3-x64.dll + + cd .\build\Release + ..\..\.github\scripts\e2e-test-windows.bat .\nitro.exe ${{ env.MODEL_URL }} + cd ..\.. + 7z a nitro.zip .\build\Release\* - uses: actions/upload-release-asset@v1.0.1 @@ -347,7 +382,7 @@ jobs: with: upload_url: ${{ needs.create-draft-release.outputs.upload_url }} asset_path: ./nitro.zip - asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-${{ matrix.build }}-cu${{ matrix.cuda }}.zip + asset_name: nitro-${{ needs.create-draft-release.outputs.version }}-win-amd64-cuda.zip asset_content_type: application/zip update_release_draft: