diff --git a/.github/scripts/e2e-test-llama-linux-and-mac.sh b/.github/scripts/e2e-test-llama-linux-and-mac.sh index 5b7b9771d..f235815e8 100644 --- a/.github/scripts/e2e-test-llama-linux-and-mac.sh +++ b/.github/scripts/e2e-test-llama-linux-and-mac.sh @@ -1,7 +1,7 @@ #!/bin/bash ## Example run command -# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf +# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/cortex-cpp/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf # Check for required arguments if [[ $# -ne 3 ]]; then @@ -9,7 +9,7 @@ if [[ $# -ne 3 ]]; then exit 1 fi -rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log +rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/cortex-cpp.log BINARY_PATH=$1 DOWNLOAD_LLM_URL=$2 @@ -22,14 +22,14 @@ range=$((max - min + 1)) PORT=$((RANDOM % range + min)) # Start the binary file -"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log & +"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/cortex-cpp.log & # Get the process id of the binary file pid=$! if ! ps -p $pid >/dev/null; then - echo "nitro failed to start. Logs:" - cat /tmp/nitro.log + echo "cortex-cpp failed to start. Logs:" + cat /tmp/cortex-cpp.log exit 1 fi @@ -47,7 +47,7 @@ if [[ ! -f "/tmp/test-embedding" ]]; then fi # Run the curl commands -response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \ +response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \ --header 'Content-Type: application/json' \ --data '{ "llama_model_path": "/tmp/testllm", @@ -57,8 +57,8 @@ response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{ht }') if ! ps -p $pid >/dev/null; then - echo "nitro failed to load model. Logs:" - cat /tmp/nitro.log + echo "cortex-cpp failed to load model. Logs:" + cat /tmp/cortex-cpp.log exit 1 fi @@ -83,14 +83,14 @@ response2=$( ) # unload model -response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \ +response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \ --header 'Content-Type: application/json' \ --data '{ "llama_model_path": "/tmp/testllm" }') # load embedding model -response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \ +response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \ --header 'Content-Type: application/json' \ --data '{ "llama_model_path": "/tmp/test-embedding", @@ -145,9 +145,9 @@ if [[ "$response5" -ne 200 ]]; then fi if [[ "$error_occurred" -eq 1 ]]; then - echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!" - echo "Nitro Error Logs:" - cat /tmp/nitro.log + echo "cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!" + echo "cortex-cpp Error Logs:" + cat /tmp/cortex-cpp.log kill $pid exit 1 fi @@ -172,7 +172,7 @@ echo "----------------------" echo "Log run test:" cat /tmp/embedding-res.log -echo "Nitro test run successfully!" +echo "cortex-cpp test run successfully!" # Kill the server process kill $pid diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat index cddca1e0b..b11e38bb1 100644 --- a/.github/scripts/e2e-test-llama-windows.bat +++ b/.github/scripts/e2e-test-llama-windows.bat @@ -23,7 +23,7 @@ del %TEMP%\response2.log 2>nul del %TEMP%\response3.log 2>nul del %TEMP%\response4.log 2>nul del %TEMP%\response5.log 2>nul -del %TEMP%\nitro.log 2>nul +del %TEMP%\cortex-cpp.log 2>nul set /a min=9999 set /a max=11000 @@ -31,11 +31,11 @@ set /a range=max-min+1 set /a PORT=%min% + %RANDOM% %% %range% rem Start the binary file -start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1 +start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\cortex-cpp.log 2>&1 ping -n 6 127.0.0.1 %PORT% > nul -rem Capture the PID of the started process with "nitro" in its name +rem Capture the PID of the started process with "cortex-cpp" in its name for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do ( set "pid=%%a" ) @@ -43,8 +43,8 @@ for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| echo pid=%pid% if not defined pid ( - echo nitro failed to start. Logs: - type %TEMP%\nitro.log + echo cortex-cpp failed to start. Logs: + type %TEMP%\cortex-cpp.log exit /b 1 ) @@ -76,15 +76,15 @@ echo curl_data4=%curl_data4% echo curl_data5=%curl_data5% rem Run the curl commands and capture the status code -curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 +curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1 -curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^ +curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/chat_completion" ^ --header "Content-Type: application/json" ^ --data "%curl_data2%" > %TEMP%\response2.log 2>&1 -curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1 +curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1 -curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1 +curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1 curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^ --header "Content-Type: application/json" ^ @@ -130,9 +130,9 @@ if "%response5%" neq "200" ( ) if "%error_occurred%"=="1" ( - echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!! - echo Nitro Error Logs: - type %TEMP%\nitro.log + echo cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!! + echo cortex-cpp Error Logs: + type %TEMP%\cortex-cpp.log taskkill /f /pid %pid% exit /b 1 ) @@ -158,8 +158,8 @@ echo ---------------------- echo Log run embedding test: type %TEMP%\response5.log -echo Nitro test run successfully! +echo cortex-cpp test run successfully! rem Kill the server process @REM taskkill /f /pid %pid% -taskkill /f /im nitro.exe 2>nul || exit /B 0 \ No newline at end of file +taskkill /f /im cortex-cpp.exe 2>nul || exit /B 0 \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 356a2a9ca..716dfd679 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -185,8 +185,8 @@ jobs: - name: Upload Artifact uses: actions/upload-artifact@v2 with: - name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }} - path: ./cortex-cpp/cortex + name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }} + path: ./cortex-cpp/cortex-cpp - uses: actions/upload-release-asset@v1.0.1 if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') @@ -194,6 +194,6 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./cortex-cpp/cortex.tar.gz - asset_name: cortex-llamacpp-engine-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz + asset_path: ./cortex-cpp/cortex-cpp.tar.gz + asset_name: cortex-cpp-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz asset_content_type: application/gzip \ No newline at end of file diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml index 82930e0ed..909ab7e77 100644 --- a/.github/workflows/quality-gate.yml +++ b/.github/workflows/quality-gate.yml @@ -159,5 +159,5 @@ jobs: - name: Upload Artifact uses: actions/upload-artifact@v2 with: - name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }} - path: ./cortex-cpp/cortex \ No newline at end of file + name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }} + path: ./cortex-cpp/cortex-cpp \ No newline at end of file diff --git a/.github/workflows/update-release-url.yml b/.github/workflows/update-release-url.yml deleted file mode 100644 index 710d23ba7..000000000 --- a/.github/workflows/update-release-url.yml +++ /dev/null @@ -1,50 +0,0 @@ -name: Update Download URLs - -on: - release: - types: - - published - - workflow_dispatch: - -jobs: - update-readme: - runs-on: ubuntu-latest - environment: production - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: "0" - token: ${{ secrets.PAT_SERVICE_ACCOUNT }} - ref: main - - - name: Get Latest Release - uses: pozetroninc/github-action-get-latest-release@v0.7.0 - id: get-latest-release - with: - repository: ${{ github.repository }} - - - name: Update Download URLs in README.md - run: | - echo "Latest Release: ${{ steps.get-latest-release.outputs.release }}" - tag=$(/bin/echo -n "${{ steps.get-latest-release.outputs.release }}") - echo "Tag: $tag" - # Remove the v prefix - release=${tag:1} - echo "Release: $release" - sed -i "s|||" README.md - sed -i "s|||" README.md - sed -i "s|||" README.md - sed -i "s|||" README.md - sed -i "s|||" README.md - sed -i "s|||" README.md - - - name: Commit and Push changes - if: github.event_name == 'release' - run: | - git config --global user.email "service@jan.ai" - git config --global user.name "Service Account" - git add README.md - git commit -m "Update README.md with Stable Download URLs" - git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:main \ No newline at end of file diff --git a/cortex-cpp/.gitignore b/cortex-cpp/.gitignore index 69c167305..10d117410 100644 --- a/cortex-cpp/.gitignore +++ b/cortex-cpp/.gitignore @@ -85,7 +85,6 @@ CMakeCache.txt CMakeFiles CMakeScripts Testing -!nitro-node/Makefile cmake_install.cmake install_manifest.txt compile_commands.json @@ -561,7 +560,7 @@ FodyWeavers.xsd # End of https://www.toptal.com/developers/gitignore/api/intellij+all,visualstudio,visualstudiocode,cmake,c,c++ build -build_deps +build-deps .DS_Store uploads/** \ No newline at end of file diff --git a/cortex-cpp/CMakeLists.txt b/cortex-cpp/CMakeLists.txt index 97be0e86d..8c01d2256 100644 --- a/cortex-cpp/CMakeLists.txt +++ b/cortex-cpp/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.5) -project(nitro C CXX) +project(cortex-cpp C CXX) include(engines/cortex.llamacpp/engine.cmake) include(CheckIncludeFileCXX) @@ -21,7 +21,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) set(OPENSSL_USE_STATIC_LIBS TRUE) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build_deps/_install) +set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build-deps/_install) # This is the critical line for installing another package if(LLAMA_CUDA) @@ -35,12 +35,12 @@ if(LLAMA_CUDA) endif() if(DEBUG) - message(STATUS "NITRO DEBUG IS ON") + message(STATUS "CORTEX-CPP DEBUG IS ON") add_compile_definitions(ALLOW_ALL_CORS) endif() -if(NOT DEFINED NITRO_VERSION) - set(NITRO_VERSION "default_version") +if(NOT DEFINED CORTEX_CPP_VERSION) + set(CORTEX_CPP_VERSION "default_version") endif() if(APPLE) @@ -54,7 +54,7 @@ if(APPLE) endif() endif() -add_compile_definitions(NITRO_VERSION="${NITRO_VERSION}") +add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}") add_subdirectory(test) @@ -62,8 +62,8 @@ add_executable(${PROJECT_NAME} main.cc) # ############################################################################## # If you include the drogon source code locally in your project, use this method -# to add drogon add_subdirectory(nitro_deps) -# target_link_libraries(${PROJECT_NAME} PRIVATE nitro_deps) +# to add drogon add_subdirectory(cortex-cpp-deps) +# target_link_libraries(${PROJECT_NAME} PRIVATE cortex-cpp-deps) # # and comment out the following lines diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile index 960bb198a..9f4c98d1b 100644 --- a/cortex-cpp/Makefile +++ b/cortex-cpp/Makefile @@ -14,8 +14,8 @@ all: # Build the Cortex engine build: ifeq ($(OS),Windows_NT) - @powershell -Command "cmake -S ./nitro_deps -B ./build_deps/nitro_deps;" - @powershell -Command "cmake --build ./build_deps/nitro_deps --config Release -j4;" + @powershell -Command "cmake -S ./cortex-cpp-deps -B ./build-deps/cortex-cpp-deps;" + @powershell -Command "cmake --build ./build-deps/cortex-cpp-deps --config Release -j4;" @powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release -j4;" else ifeq ($(shell uname -s),Linux) @./install_deps.sh; @@ -31,23 +31,23 @@ endif package: ifeq ($(OS),Windows_NT) - @powershell -Command "mkdir -p cortex\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex\engines\cortex.llamacpp\;" - @powershell -Command "cp build\Release\nitro.exe .\cortex\;" - @powershell -Command "cp build_deps\_install\bin\zlib.dll .\cortex\;" - @powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex\;" - @powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex\;" - @powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex\;" - @powershell -Command "7z a -ttar temp.tar cortex\\*; 7z a -tgzip cortex.tar.gz temp.tar;" + @powershell -Command "mkdir -p cortex-cpp\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex-cpp\engines\cortex.llamacpp\;" + @powershell -Command "cp build\Release\cortex-cpp.exe .\cortex-cpp\;" + @powershell -Command "cp build-deps\_install\bin\zlib.dll .\cortex-cpp\;" + @powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;" + @powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;" + @powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;" + @powershell -Command "7z a -ttar temp.tar cortex-cpp\\*; 7z a -tgzip cortex-cpp.tar.gz temp.tar;" else ifeq ($(shell uname -s),Linux) - @mkdir -p cortex/engines/cortex.llamacpp; \ - cp build/engines/cortex.llamacpp/libengine.so cortex/engines/cortex.llamacpp/; \ - cp build/nitro cortex/; \ - tar -czvf cortex.tar.gz cortex; + @mkdir -p cortex-cpp/engines/cortex.llamacpp; \ + cp build/engines/cortex.llamacpp/libengine.so cortex-cpp/engines/cortex.llamacpp/; \ + cp build/cortex-cpp cortex-cpp/; \ + tar -czvf cortex-cpp.tar.gz cortex-cpp; else - @mkdir -p cortex/engines/cortex.llamacpp; \ - cp build/engines/cortex.llamacpp/libengine.dylib cortex/engines/cortex.llamacpp/; \ - cp build/nitro cortex/; \ - tar -czvf cortex.llamacpp.tar.gz cortex; + @mkdir -p cortex-cpp/engines/cortex.llamacpp; \ + cp build/engines/cortex.llamacpp/libengine.dylib cortex-cpp/engines/cortex.llamacpp/; \ + cp build/cortex-cpp cortex-cpp/; \ + tar -czvf cortex-cpp.tar.gz cortex-cpp; endif run-e2e-test: @@ -56,13 +56,13 @@ ifeq ($(RUN_TESTS),false) @exit 0 endif ifeq ($(OS),Windows_NT) - @powershell -Command "cd cortex; ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);" + @powershell -Command "cd cortex-cpp; ..\..\.github\scripts\e2e-test-llama-windows.bat cortex-cpp.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);" else ifeq ($(shell uname -s),Linux) - @cd cortex; \ - chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \ + @cd cortex-cpp; \ + chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \ rm -rf uploads/; else - @cd cortex; \ - chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \ + @cd cortex-cpp; \ + chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \ rm -rf uploads/; endif \ No newline at end of file diff --git a/cortex-cpp/README.md b/cortex-cpp/README.md index f10301363..009c0254f 100644 --- a/cortex-cpp/README.md +++ b/cortex-cpp/README.md @@ -1,14 +1,14 @@ -# Nitro - Embeddable AI +# cortex-cpp - Embeddable AI

nitrologo

Documentation - API Reference - - Changelog - Bug reports - Discord + - Changelog - Bug reports - Discord

-> ⚠️ **Nitro is currently in Development**: Expect breaking changes and bugs! +> ⚠️ **cortex-cpp is currently in Development**: Expect breaking changes and bugs! ## Features - Fast Inference: Built on top of the cutting-edge inference library llama.cpp, modified to be production ready. @@ -17,11 +17,11 @@ - Quick Setup: Approximately 10-second initialization for swift deployment. - Enhanced Web Framework: Incorporates drogon cpp to boost web service efficiency. -## About Nitro +## About cortex-cpp -Nitro is a high-efficiency C++ inference engine for edge computing, powering [Jan](https://jan.ai/). It is lightweight and embeddable, ideal for product integration. +cortex-cpp is a high-efficiency C++ inference engine for edge computing, powering [Jan](https://jan.ai/). It is lightweight and embeddable, ideal for product integration. -The binary of nitro after zipped is only ~3mb in size with none to minimal dependencies (if you use a GPU need CUDA for example) make it desirable for any edge/server deployment 👍. +The binary of cortex-cpp after zipped is only ~3mb in size with none to minimal dependencies (if you use a GPU need CUDA for example) make it desirable for any edge/server deployment 👍. > Read more about Nitro at https://nitro.jan.ai/ @@ -32,13 +32,13 @@ The binary of nitro after zipped is only ~3mb in size with none to minimal depen ├── controllers ├── docs ├── llama.cpp -> Upstream llama C++ -├── nitro_deps -> Dependencies of the Nitro project as a sub-project +├── cortex-cpp-deps -> Dependencies of the cortex-cpp project as a sub-project └── utils ``` ## Quickstart -**Step 1: Install Nitro** +**Step 1: Install cortex-cpp** - For Linux and MacOS @@ -59,16 +59,16 @@ mkdir model && cd model wget -O llama-2-7b-model.gguf https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf?download=true ``` -**Step 3: Run Nitro server** +**Step 3: Run cortex-cpp server** -```bash title="Run Nitro server" -nitro +```bash title="Run cortex-cpp server" +cortex-cpp ``` **Step 4: Load model** ```bash title="Load model" -curl http://localhost:3928/inferences/llamacpp/loadmodel \ +curl http://localhost:3928/inferences/server/loadmodel \ -H 'Content-Type: application/json' \ -d '{ "llama_model_path": "/model/llama-2-7b-model.gguf", @@ -79,7 +79,7 @@ curl http://localhost:3928/inferences/llamacpp/loadmodel \ **Step 5: Making an Inference** -```bash title="Nitro Inference" +```bash title="cortex-cpp Inference" curl http://localhost:3928/v1/chat/completions \ -H "Content-Type: application/json" \ -d '{ @@ -118,17 +118,17 @@ Table of parameters ***OPTIONAL***: You can run Nitro on a different port like 5000 instead of 3928 by running it manually in terminal ```zsh -./nitro 1 127.0.0.1 5000 ([thread_num] [host] [port] [uploads_folder_path]) +./cortex-cpp 1 127.0.0.1 5000 ([thread_num] [host] [port] [uploads_folder_path]) ``` -- thread_num : the number of thread that nitro webserver needs to have +- thread_num : the number of thread that cortex-cpp webserver needs to have - host : host value normally 127.0.0.1 or 0.0.0.0 -- port : the port that nitro got deployed onto +- port : the port that cortex-cpp got deployed onto - uploads_folder_path: custom path for file uploads in Drogon. -Nitro server is compatible with the OpenAI format, so you can expect the same output as the OpenAI ChatGPT API. +cortex-cpp server is compatible with the OpenAI format, so you can expect the same output as the OpenAI ChatGPT API. ## Compile from source -To compile nitro please visit [Compile from source](docs/docs/new/build-source.md) +To compile cortex-cpp please visit [Compile from source](docs/docs/new/build-source.md) ## Download @@ -188,7 +188,7 @@ To compile nitro please visit [Compile from source](docs/docs/new/build-source.m -Download the latest version of Nitro at https://nitro.jan.ai/ or visit the **[GitHub Releases](https://github.com/janhq/nitro/releases)** to download any previous release. +Download the latest version of Nitro at https://nitro.jan.ai/ or visit the **[GitHub Releases](https://github.com/janhq/cortex/releases)** to download any previous release. ## Nightly Build diff --git a/cortex-cpp/controllers/health.cc b/cortex-cpp/controllers/health.cc index 196d6de47..db4df4b22 100644 --- a/cortex-cpp/controllers/health.cc +++ b/cortex-cpp/controllers/health.cc @@ -1,12 +1,12 @@ #include "health.h" -#include "utils/nitro_utils.h" +#include "utils/cortex_utils.h" void health::asyncHandleHttpRequest( const HttpRequestPtr &req, std::function &&callback) { - auto resp = nitro_utils::nitroHttpResponse(); + auto resp = cortex_utils::nitroHttpResponse(); resp->setStatusCode(k200OK); resp->setContentTypeCode(CT_TEXT_HTML); - resp->setBody("Nitro is alive!!!"); + resp->setBody("cortex-cpp is alive!!!"); callback(resp); } diff --git a/cortex-cpp/controllers/llamaCPP.cc b/cortex-cpp/controllers/server.cc similarity index 85% rename from cortex-cpp/controllers/llamaCPP.cc rename to cortex-cpp/controllers/server.cc index d3e489072..af8a91df8 100644 --- a/cortex-cpp/controllers/llamaCPP.cc +++ b/cortex-cpp/controllers/server.cc @@ -1,4 +1,4 @@ -#include "llamaCPP.h" +#include "server.h" #include #include @@ -6,7 +6,7 @@ #include "trantor/utils/Logger.h" #include "utils/logging_utils.h" -#include "utils/nitro_utils.h" +#include "utils/cortex_utils.h" using namespace inferences; using json = nlohmann::json; @@ -16,7 +16,7 @@ constexpr static auto kLlamaEngine = "cortex.llamacpp"; constexpr static auto kLlamaLibPath = "./engines/cortex.llamacpp"; } // namespace -llamaCPP::llamaCPP() +server::server() : engine_{nullptr} { // Some default values for now below @@ -25,15 +25,15 @@ llamaCPP::llamaCPP() // system () }; -llamaCPP::~llamaCPP() {} +server::~server() {} -void llamaCPP::ChatCompletion( +void server::ChatCompletion( const HttpRequestPtr& req, std::function&& callback) { if (!IsEngineLoaded()) { Json::Value res; res["message"] = "Engine is not loaded yet"; - auto resp = nitro_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::nitroHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "Engine is not loaded yet"; @@ -57,13 +57,13 @@ void llamaCPP::ChatCompletion( LOG_TRACE << "Done chat completion"; } -void llamaCPP::Embedding( +void server::Embedding( const HttpRequestPtr& req, std::function&& callback) { if (!IsEngineLoaded()) { Json::Value res; res["message"] = "Engine is not loaded yet"; - auto resp = nitro_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::nitroHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "Engine is not loaded yet"; @@ -81,13 +81,13 @@ void llamaCPP::Embedding( LOG_TRACE << "Done embedding"; } -void llamaCPP::UnloadModel( +void server::UnloadModel( const HttpRequestPtr& req, std::function&& callback) { if (!IsEngineLoaded()) { Json::Value res; res["message"] = "Engine is not loaded yet"; - auto resp = nitro_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::nitroHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "Engine is not loaded yet"; @@ -97,7 +97,7 @@ void llamaCPP::UnloadModel( engine_->UnloadModel( req->getJsonObject(), [cb = std::move(callback)](Json::Value status, Json::Value res) { - auto resp = nitro_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::nitroHttpJsonResponse(res); resp->setStatusCode( static_cast(status["status_code"].asInt())); cb(resp); @@ -105,13 +105,13 @@ void llamaCPP::UnloadModel( LOG_TRACE << "Done unload model"; } -void llamaCPP::ModelStatus( +void server::ModelStatus( const HttpRequestPtr& req, std::function&& callback) { if (!IsEngineLoaded()) { Json::Value res; res["message"] = "Engine is not loaded yet"; - auto resp = nitro_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::nitroHttpJsonResponse(res); resp->setStatusCode(k409Conflict); callback(resp); LOG_WARN << "Engine is not loaded yet"; @@ -122,7 +122,7 @@ void llamaCPP::ModelStatus( engine_->GetModelStatus( req->getJsonObject(), [cb = std::move(callback)](Json::Value status, Json::Value res) { - auto resp = nitro_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::nitroHttpJsonResponse(res); resp->setStatusCode( static_cast(status["status_code"].asInt())); cb(resp); @@ -130,7 +130,7 @@ void llamaCPP::ModelStatus( LOG_TRACE << "Done get model status"; } -void llamaCPP::LoadModel( +void server::LoadModel( const HttpRequestPtr& req, std::function&& callback) { auto engine_type = @@ -157,7 +157,7 @@ void llamaCPP::LoadModel( if (!dylib_) { Json::Value res; res["message"] = "Could not load engine " + cur_engine_name_; - auto resp = nitro_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::nitroHttpJsonResponse(res); resp->setStatusCode(k500InternalServerError); callback(resp); return; @@ -171,7 +171,7 @@ void llamaCPP::LoadModel( engine_->LoadModel( req->getJsonObject(), [cb = std::move(callback)](Json::Value status, Json::Value res) { - auto resp = nitro_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::nitroHttpJsonResponse(res); resp->setStatusCode( static_cast(status["status_code"].asInt())); cb(resp); @@ -179,7 +179,7 @@ void llamaCPP::LoadModel( LOG_TRACE << "Done load model"; } -void llamaCPP::ProcessStreamRes(std::function cb, +void server::ProcessStreamRes(std::function cb, std::shared_ptr q) { auto err_or_done = std::make_shared(false); auto chunked_content_provider = @@ -208,21 +208,21 @@ void llamaCPP::ProcessStreamRes(std::function cb, return n; }; - auto resp = nitro_utils::nitroStreamResponse(chunked_content_provider, + auto resp = cortex_utils::nitroStreamResponse(chunked_content_provider, "chat_completions.txt"); cb(resp); } -void llamaCPP::ProcessNonStreamRes( +void server::ProcessNonStreamRes( std::function cb, SyncQueue& q) { auto [status, res] = q.wait_and_pop(); - auto resp = nitro_utils::nitroHttpJsonResponse(res); + auto resp = cortex_utils::nitroHttpJsonResponse(res); resp->setStatusCode( static_cast(status["status_code"].asInt())); cb(resp); } -bool llamaCPP::IsEngineLoaded() { +bool server::IsEngineLoaded() { return !!engine_; } diff --git a/cortex-cpp/controllers/llamaCPP.h b/cortex-cpp/controllers/server.h similarity index 81% rename from cortex-cpp/controllers/llamaCPP.h rename to cortex-cpp/controllers/server.h index 691b20cb4..3e12ab38a 100644 --- a/cortex-cpp/controllers/llamaCPP.h +++ b/cortex-cpp/controllers/server.h @@ -30,30 +30,30 @@ using namespace drogon; namespace inferences { -class llamaCPP : public drogon::HttpController, +class server : public drogon::HttpController, public BaseModel, public BaseChatCompletion, public BaseEmbedding { struct SyncQueue; public: - llamaCPP(); - ~llamaCPP(); + server(); + ~server(); METHOD_LIST_BEGIN // list path definitions here; - METHOD_ADD(llamaCPP::ChatCompletion, "chat_completion", Post); - METHOD_ADD(llamaCPP::Embedding, "embedding", Post); - METHOD_ADD(llamaCPP::LoadModel, "loadmodel", Post); - METHOD_ADD(llamaCPP::UnloadModel, "unloadmodel", Get); - METHOD_ADD(llamaCPP::ModelStatus, "modelstatus", Get); + METHOD_ADD(server::ChatCompletion, "chat_completion", Post); + METHOD_ADD(server::Embedding, "embedding", Post); + METHOD_ADD(server::LoadModel, "loadmodel", Post); + METHOD_ADD(server::UnloadModel, "unloadmodel", Get); + METHOD_ADD(server::ModelStatus, "modelstatus", Get); // Openai compatible path - ADD_METHOD_TO(llamaCPP::ChatCompletion, "/v1/chat/completions", Post); - // ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/chat/completions", Options); + ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post); + // ADD_METHOD_TO(server::handlePrelight, "/v1/chat/completions", Options); // NOTE: prelight will be added back when browser support is properly planned - ADD_METHOD_TO(llamaCPP::Embedding, "/v1/embeddings", Post); - // ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/embeddings", Options); + ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Post); + // ADD_METHOD_TO(server::handlePrelight, "/v1/embeddings", Options); // PATH_ADD("/llama/chat_completion", Post); METHOD_LIST_END diff --git a/cortex-cpp/nitro_deps/.gitignore b/cortex-cpp/cortex-cpp-deps/.gitignore similarity index 100% rename from cortex-cpp/nitro_deps/.gitignore rename to cortex-cpp/cortex-cpp-deps/.gitignore diff --git a/cortex-cpp/nitro_deps/CMakeLists.txt b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt similarity index 95% rename from cortex-cpp/nitro_deps/CMakeLists.txt rename to cortex-cpp/cortex-cpp-deps/CMakeLists.txt index 667024cd7..4e080a026 100644 --- a/cortex-cpp/nitro_deps/CMakeLists.txt +++ b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt @@ -9,7 +9,7 @@ if(UNIX AND NOT APPLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") endif() set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(THIRD_PARTY_INSTALL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../build_deps/_install) +set(THIRD_PARTY_INSTALL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../build-deps/_install) #if(NOT THIRD_PARTY_INSTALL_PATH ) # message(FATAL_ERROR "TRITON_THIRD_PARTY_INSTALL_PREFIX must be set") #endif() # TRITON_THIRD_PARTY_INSTALL_PREFIX @@ -96,7 +96,7 @@ ExternalProject_Add( if(WIN32) # Fix trantor cmakelists to link c-ares on Windows - set(TRANTOR_CMAKE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/../build_deps/nitro_deps/drogon-prefix/src/drogon/trantor/CMakeLists.txt) + set(TRANTOR_CMAKE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/../build-deps/cortex-cpp-deps/drogon-prefix/src/drogon/trantor/CMakeLists.txt) ExternalProject_Add_Step(drogon trantor_custom_target COMMAND ${CMAKE_COMMAND} -E echo add_definitions(-DCARES_STATICLIB) >> ${TRANTOR_CMAKE_FILE} DEPENDEES download diff --git a/cortex-cpp/nitro_deps/README.md b/cortex-cpp/cortex-cpp-deps/README.md similarity index 100% rename from cortex-cpp/nitro_deps/README.md rename to cortex-cpp/cortex-cpp-deps/README.md diff --git a/cortex-cpp/engines/cortex.llamacpp/engine.cmake b/cortex-cpp/engines/cortex.llamacpp/engine.cmake index be7df6b12..a4922f863 100644 --- a/cortex-cpp/engines/cortex.llamacpp/engine.cmake +++ b/cortex-cpp/engines/cortex.llamacpp/engine.cmake @@ -1,5 +1,5 @@ # cortex.llamacpp release version -set(VERSION 0.1.0) +set(VERSION 0.1.1) set(ENGINE_VERSION v${VERSION}) # MESSAGE("ENGINE_VERSION=" ${ENGINE_VERSION}) diff --git a/cortex-cpp/examples/interface/README.md b/cortex-cpp/examples/interface/README.md index 6d4df5aae..13f1518f3 100644 --- a/cortex-cpp/examples/interface/README.md +++ b/cortex-cpp/examples/interface/README.md @@ -1,4 +1,4 @@ -This guide provides instructions to create a chatbot powered by Nitro using the GGUF model. +This guide provides instructions to create a chatbot powered by cortex-cpp using the GGUF model. ## Step 1: Download the Model @@ -34,7 +34,7 @@ Now, you'll set up the model in your application. ```zsh title="Example Configuration" {2} dat = { - "llama_model_path": "nitro/interface/models/zephyr-7b-beta.Q5_K_M.gguf", + "llama_model_path": "cortex-cpp/interface/models/zephyr-7b-beta.Q5_K_M.gguf", "ctx_len": 2048, "ngl": 100, "embedding": True, @@ -44,6 +44,6 @@ Now, you'll set up the model in your application. "ai_prompt": "ASSISTANT: "} ``` -Congratulations! Your Nitro chatbot is now set up. Feel free to experiment with different configuration parameters to tailor the chatbot to your needs. +Congratulations! Your cortex-cpp chatbot is now set up. Feel free to experiment with different configuration parameters to tailor the chatbot to your needs. -For more information on parameter settings and their effects, please refer to Run Nitro(using-nitro) for a comprehensive parameters table. \ No newline at end of file +For more information on parameter settings and their effects, please refer to Run cortex-cpp(using-cortex-cpp) for a comprehensive parameters table. \ No newline at end of file diff --git a/cortex-cpp/examples/interface/app.py b/cortex-cpp/examples/interface/app.py index 3b9e32b05..6a0098130 100644 --- a/cortex-cpp/examples/interface/app.py +++ b/cortex-cpp/examples/interface/app.py @@ -5,8 +5,8 @@ import requests # URLs for model loading and chat completion -load_model_url = "http://localhost:3928/inferences/llamacpp/loadmodel" -chat_completion_url = "http://localhost:3928/inferences/llamacpp/chat_completion" +load_model_url = "http://localhost:3928/inferences/server/loadmodel" +chat_completion_url = "http://localhost:3928/inferences/server/chat_completion" headers = { 'Content-Type': 'application/json' @@ -15,7 +15,7 @@ # Function to load the model def load_model(): load_data = { - "llama_model_path": "nitro/model/llama-2-7b-chat.Q5_K_M.gguf?download=true" + "llama_model_path": "cortex-cpp/model/llama-2-7b-chat.Q5_K_M.gguf?download=true" # Add other necessary parameters if required } response = requests.post(load_model_url, headers=headers, data=json.dumps(load_data)) @@ -43,7 +43,7 @@ def add_file(history, file): def bot(history): last_message = history[-1][0] if history else "" dat = { - "llama_model_path": "nitro/model/llama-2-7b-chat.Q5_K_M.gguf?download=true", + "llama_model_path": "cortex-cpp/model/llama-2-7b-chat.Q5_K_M.gguf?download=true", "messages": [ { "role": "user", @@ -73,7 +73,7 @@ def bot(history): [], elem_id="chatbot", bubble_full_width=False, - avatar_images=(None, (os.path.join(os.path.dirname(__file__), "nitro/example/avatar.png"))), + avatar_images=(None, (os.path.join(os.path.dirname(__file__), "cortex-cpp/example/avatar.png"))), ) with gr.Row(): @@ -89,4 +89,4 @@ def bot(history): # Launch the application if __name__ == "__main__": demo.queue() - demo.launch(allowed_paths=["nitro/example/avatar.png"]) \ No newline at end of file + demo.launch(allowed_paths=["cortex-cpp/example/avatar.png"]) \ No newline at end of file diff --git a/cortex-cpp/install.bat b/cortex-cpp/install.bat index 919204b60..fc2faa4b1 100644 --- a/cortex-cpp/install.bat +++ b/cortex-cpp/install.bat @@ -1,10 +1,10 @@ @echo off setlocal -:: Remove existing nitro directory if it exists -if exist "%APPDATA%\nitro" ( - echo Removing existing Nitro installation... - rmdir /S /Q "%APPDATA%\nitro" +:: Remove existing cortex-cpp directory if it exists +if exist "%APPDATA%\cortex-cpp" ( + echo Removing existing cortex-cpp installation... + rmdir /S /Q "%APPDATA%\cortex-cpp" ) :: Parse arguments @@ -40,34 +40,34 @@ echo %VERSION% :: Get the release if "%VERSION%"=="latest" ( - :: If the version is set to "latest", get the latest version number from the Nitro GitHub repository - for /f "delims=" %%i in ('powershell -Command "& {$version = Invoke-RestMethod -Uri 'https://api.github.com/repos/janhq/nitro/releases/latest'; return $version.tag_name.TrimStart('v')}"') do set "VERSION=%%i" + :: If the version is set to "latest", get the latest version number from the cortex-cpp GitHub repository + for /f "delims=" %%i in ('powershell -Command "& {$version = Invoke-RestMethod -Uri 'https://api.github.com/repos/janhq/cortex/releases/latest'; return $version.tag_name.TrimStart('v')}"') do set "VERSION=%%i" ) :: Construct the download URL -set "URL=https://github.com/janhq/nitro/releases/download/v%VERSION%/nitro-%VERSION%-win-amd64%AVX%" +set "URL=https://github.com/janhq/cortex/releases/download/v%VERSION%/cortex-cpp-%VERSION%-win-amd64%AVX%" if "%GPU%"=="true" ( :: If --gpu option is provided, append -cuda to the URL set "URL=%URL%-cuda" ) set "URL=%URL%.tar.gz" -:: Download and extract nitro -echo Downloading Nitro from: %URL% -powershell -Command "Invoke-WebRequest -OutFile '%TEMP%\nitro.tar.gz' '%URL%'" -echo Extracting Nitro... -powershell -Command "mkdir '%APPDATA%\nitro'" -powershell -Command "tar -zxvf '%TEMP%\nitro.tar.gz' -C '%APPDATA%\nitro'" +:: Download and extract cortex-cpp +echo Downloading cortex-cpp from: %URL% +powershell -Command "Invoke-WebRequest -OutFile '%TEMP%\cortex-cpp.tar.gz' '%URL%'" +echo Extracting cortex-cpp... +powershell -Command "mkdir '%APPDATA%\cortex-cpp'" +powershell -Command "tar -zxvf '%TEMP%\cortex-cpp.tar.gz' -C '%APPDATA%\cortex-cpp'" -:: Add nitro to the PATH -setx PATH "%APPDATA%\nitro;%PATH%" +:: Add cortex-cpp to the PATH +setx PATH "%APPDATA%\cortex-cpp;%PATH%" -:: Create uninstallnitro.bat -echo @echo off > "%APPDATA%\nitro\uninstallnitro.bat" -echo setx PATH "%PATH:;%APPDATA%\nitro=;%"" >> "%APPDATA%\nitro\uninstallnitro.bat" -echo rmdir /S /Q "%APPDATA%\nitro" >> "%APPDATA%\nitro\uninstallnitro.bat" +:: Create uninstallcortex-cpp.bat +echo @echo off > "%APPDATA%\cortex-cpp\uninstallcortex-cpp.bat" +echo setx PATH "%PATH:;%APPDATA%\cortex-cpp=;%"" >> "%APPDATA%\cortex-cpp\uninstallcortex-cpp.bat" +echo rmdir /S /Q "%APPDATA%\cortex-cpp" >> "%APPDATA%\cortex-cpp\uninstallcortex-cpp.bat" :: Clean up -del %TEMP%\nitro.tar.gz +del %TEMP%\cortex-cpp.tar.gz endlocal diff --git a/cortex-cpp/install.sh b/cortex-cpp/install.sh index 34bc29dc3..ee3499f31 100644 --- a/cortex-cpp/install.sh +++ b/cortex-cpp/install.sh @@ -51,17 +51,17 @@ determine_avx_support() { fi } -# Function to download and install nitro -install_nitro() { - rm -rf /tmp/nitro - rm /tmp/nitro.tar.gz - echo "Downloading Nitro version $VERSION... from $1" - curl -sL "$1" -o /tmp/nitro.tar.gz - tar -xzvf /tmp/nitro.tar.gz -C /tmp - ls /tmp/nitro +# Function to download and install cortex-cpp +install_cortex-cpp() { + rm -rf /tmp/cortex-cpp + rm /tmp/cortex-cpp.tar.gz + echo "Downloading cortex-cpp version $VERSION... from $1" + curl -sL "$1" -o /tmp/cortex-cpp.tar.gz + tar -xzvf /tmp/cortex-cpp.tar.gz -C /tmp + ls /tmp/cortex-cpp # Copying files to /usr/local/bin - for file in /tmp/nitro/*; do + for file in /tmp/cortex-cpp/*; do chmod +x "$file" cp "$file" /usr/local/bin/ done @@ -69,18 +69,18 @@ install_nitro() { # Function to create uninstall script create_uninstall_script() { - echo '#!/bin/bash' > /tmp/uninstall_nitro.sh - echo 'if [ "$(id -u)" != "0" ]; then' >> /tmp/uninstall_nitro.sh - echo ' echo "This script must be run as root. Please run again with sudo."' >> /tmp/uninstall_nitro.sh - echo ' exit 1' >> /tmp/uninstall_nitro.sh - echo 'fi' >> /tmp/uninstall_nitro.sh - for file in /tmp/nitro/*; do - echo "rm /usr/local/bin/$(basename "$file")" >> /tmp/uninstall_nitro.sh + echo '#!/bin/bash' > /tmp/uninstall_cortex-cpp.sh + echo 'if [ "$(id -u)" != "0" ]; then' >> /tmp/uninstall_cortex-cpp.sh + echo ' echo "This script must be run as root. Please run again with sudo."' >> /tmp/uninstall_cortex-cpp.sh + echo ' exit 1' >> /tmp/uninstall_cortex-cpp.sh + echo 'fi' >> /tmp/uninstall_cortex-cpp.sh + for file in /tmp/cortex-cpp/*; do + echo "rm /usr/local/bin/$(basename "$file")" >> /tmp/uninstall_cortex-cpp.sh done - echo "rm /usr/local/bin/uninstall_nitro.sh" >> /tmp/uninstall_nitro.sh - echo 'echo "Nitro remove successfully."' >> /tmp/uninstall_nitro.sh - chmod +x /tmp/uninstall_nitro.sh - mv /tmp/uninstall_nitro.sh /usr/local/bin/ + echo "rm /usr/local/bin/uninstall_cortex-cpp.sh" >> /tmp/uninstall_cortex-cpp.sh + echo 'echo "cortex-cpp remove successfully."' >> /tmp/uninstall_cortex-cpp.sh + chmod +x /tmp/uninstall_cortex-cpp.sh + mv /tmp/uninstall_cortex-cpp.sh /usr/local/bin/ } # Determine OS and architecture @@ -139,7 +139,7 @@ fi # Construct GitHub API URL and get latest version if not specified if [ "$VERSION" == "latest" ]; then - API_URL="https://api.github.com/repos/janhq/nitro/releases/latest" + API_URL="https://api.github.com/repos/janhq/cortex/releases/latest" VERSION=$(curl -s $API_URL | jq -r ".tag_name" | sed 's/^v//') fi @@ -155,11 +155,11 @@ case $OS in if [ -z "$AVX" ]; then AVX=$(determine_avx_support) fi - FILE_NAME="nitro-${VERSION}-linux-amd64${AVX}${GPU}${CUDA_VERSION}.tar.gz" + FILE_NAME="cortex-cpp-${VERSION}-linux-amd64${AVX}${GPU}${CUDA_VERSION}.tar.gz" ;; Darwin) ARCH_FORMAT="mac-universal" - FILE_NAME="nitro-${VERSION}-${ARCH_FORMAT}.tar.gz" + FILE_NAME="cortex-cpp-${VERSION}-${ARCH_FORMAT}.tar.gz" ;; *) echo "Unsupported OS." @@ -167,7 +167,7 @@ case $OS in ;; esac -DOWNLOAD_URL="https://github.com/janhq/nitro/releases/download/v${VERSION}/${FILE_NAME}" +DOWNLOAD_URL="https://github.com/janhq/cortex/releases/download/v${VERSION}/${FILE_NAME}" # Check AVX support if [ -z "$AVX" ] && [ "$OS" == "Linux" ]; then @@ -175,12 +175,12 @@ if [ -z "$AVX" ] && [ "$OS" == "Linux" ]; then exit 1 fi -# Remove existing Nitro installation -echo "Removing existing Nitro installation..." -rm -rf /usr/local/bin/nitro +# Remove existing cortex-cpp installation +echo "Removing existing cortex-cpp installation..." +rm -rf /usr/local/bin/cortex-cpp # Download, install, and create uninstall script -install_nitro "$DOWNLOAD_URL" +install_cortex-cpp "$DOWNLOAD_URL" create_uninstall_script -echo "Nitro installed successfully." +echo "cortex-cpp installed successfully." diff --git a/cortex-cpp/install_deps.sh b/cortex-cpp/install_deps.sh index d43257aa0..879b63cd8 100755 --- a/cortex-cpp/install_deps.sh +++ b/cortex-cpp/install_deps.sh @@ -1,3 +1,3 @@ -cmake -S ./nitro_deps -B ./build_deps/nitro_deps -make -C ./build_deps/nitro_deps -j 10 -rm -rf ./build_deps/nitro_deps +cmake -S ./cortex-cpp-deps -B ./build-deps/cortex-cpp-deps +make -C ./build-deps/cortex-cpp-deps -j 10 +rm -rf ./build-deps/cortex-cpp-deps diff --git a/cortex-cpp/main.cc b/cortex-cpp/main.cc index 595186072..53c65cd37 100644 --- a/cortex-cpp/main.cc +++ b/cortex-cpp/main.cc @@ -1,15 +1,15 @@ -#include "utils/nitro_utils.h" -#include // for PATH_MAX #include #include +#include // for PATH_MAX #include +#include "utils/cortex_utils.h" #if defined(__APPLE__) && defined(__MACH__) -#include // for dirname() +#include // for dirname() #include #elif defined(__linux__) -#include // for dirname() -#include // for readlink() +#include // for dirname() +#include // for readlink() #elif defined(_WIN32) #include #undef max @@ -17,13 +17,13 @@ #error "Unsupported platform!" #endif -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { int thread_num = 1; std::string host = "127.0.0.1"; int port = 3928; std::string uploads_folder_path; - // Number of nitro threads + // Number of cortex-cpp threads if (argc > 1) { thread_num = std::atoi(argv[1]); } @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) { // Check for port argument if (argc > 3) { - port = std::atoi(argv[3]); // Convert string argument to int + port = std::atoi(argv[3]); // Convert string argument to int } // Uploads folder path @@ -45,11 +45,11 @@ int main(int argc, char *argv[]) { int logical_cores = std::thread::hardware_concurrency(); int drogon_thread_num = std::max(thread_num, logical_cores); - nitro_utils::nitro_logo(); -#ifdef NITRO_VERSION - LOG_INFO << "Nitro version: " << NITRO_VERSION; + // cortex_utils::nitro_logo(); +#ifdef CORTEX_CPP_VERSION + LOG_INFO << "cortex-cpp version: " << CORTEX_CPP_VERSION; #else - LOG_INFO << "Nitro version: undefined"; + LOG_INFO << "cortex-cpp version: undefined"; #endif LOG_INFO << "Server started, listening at: " << host << ":" << port; LOG_INFO << "Please load your model"; diff --git a/cortex-cpp/test/components/test_nitro_utils.cc b/cortex-cpp/test/components/test_cortex_utils.cc similarity index 76% rename from cortex-cpp/test/components/test_nitro_utils.cc rename to cortex-cpp/test/components/test_cortex_utils.cc index adf3e976b..2d85f6909 100644 --- a/cortex-cpp/test/components/test_nitro_utils.cc +++ b/cortex-cpp/test/components/test_cortex_utils.cc @@ -1,5 +1,5 @@ #include "gtest/gtest.h" -#include "utils/nitro_utils.h" +#include "utils/cortex_utils.h" class NitroUtilTest : public ::testing::Test { }; @@ -7,35 +7,35 @@ class NitroUtilTest : public ::testing::Test { TEST_F(NitroUtilTest, left_trim) { { std::string empty; - nitro_utils::ltrim(empty); + cortex_utils::ltrim(empty); EXPECT_EQ(empty, ""); } { std::string s = "abc"; std::string expected = "abc"; - nitro_utils::ltrim(s); + cortex_utils::ltrim(s); EXPECT_EQ(s, expected); } { std::string s = " abc"; std::string expected = "abc"; - nitro_utils::ltrim(s); + cortex_utils::ltrim(s); EXPECT_EQ(s, expected); } { std::string s = "1 abc 2 "; std::string expected = "1 abc 2 "; - nitro_utils::ltrim(s); + cortex_utils::ltrim(s); EXPECT_EQ(s, expected); } { std::string s = " |abc"; std::string expected = "|abc"; - nitro_utils::ltrim(s); + cortex_utils::ltrim(s); EXPECT_EQ(s, expected); } } diff --git a/cortex-cpp/utils/nitro_utils.h b/cortex-cpp/utils/cortex_utils.h similarity index 99% rename from cortex-cpp/utils/nitro_utils.h rename to cortex-cpp/utils/cortex_utils.h index c1087b345..2790e2d38 100644 --- a/cortex-cpp/utils/nitro_utils.h +++ b/cortex-cpp/utils/cortex_utils.h @@ -18,7 +18,7 @@ #include #endif -namespace nitro_utils { +namespace cortex_utils { inline std::string models_folder = "./models"; @@ -282,4 +282,4 @@ inline void ltrim(std::string& s) { })); }; -} // namespace nitro_utils +} // namespace cortex_utils