diff --git a/.github/scripts/e2e-test-llama-linux-and-mac.sh b/.github/scripts/e2e-test-llama-linux-and-mac.sh
index 5b7b9771d..f235815e8 100644
--- a/.github/scripts/e2e-test-llama-linux-and-mac.sh
+++ b/.github/scripts/e2e-test-llama-linux-and-mac.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 ## Example run command
-# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
+# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/cortex-cpp/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
 
 # Check for required arguments
 if [[ $# -ne 3 ]]; then
@@ -9,7 +9,7 @@ if [[ $# -ne 3 ]]; then
     exit 1
 fi
 
-rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log
+rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/cortex-cpp.log
 
 BINARY_PATH=$1
 DOWNLOAD_LLM_URL=$2
@@ -22,14 +22,14 @@ range=$((max - min + 1))
 PORT=$((RANDOM % range + min))
 
 # Start the binary file
-"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/nitro.log &
+"$BINARY_PATH" 1 127.0.0.1 $PORT >/tmp/cortex-cpp.log &
 
 # Get the process id of the binary file
 pid=$!
 
 if ! ps -p $pid >/dev/null; then
-    echo "nitro failed to start. Logs:"
-    cat /tmp/nitro.log
+    echo "cortex-cpp failed to start. Logs:"
+    cat /tmp/cortex-cpp.log
     exit 1
 fi
 
@@ -47,7 +47,7 @@ if [[ ! -f "/tmp/test-embedding" ]]; then
 fi
 
 # Run the curl commands
-response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
+response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/testllm",
@@ -57,8 +57,8 @@ response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{ht
 }')
 
 if ! ps -p $pid >/dev/null; then
-    echo "nitro failed to load model. Logs:"
-    cat /tmp/nitro.log
+    echo "cortex-cpp failed to load model. Logs:"
+    cat /tmp/cortex-cpp.log
     exit 1
 fi
 
@@ -83,14 +83,14 @@ response2=$(
 )
 
 # unload model
-response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \
+response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/unloadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/testllm"
 }')
 
 # load embedding model
-response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
+response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/server/loadModel" \
     --header 'Content-Type: application/json' \
     --data '{
     "llama_model_path": "/tmp/test-embedding",
@@ -145,9 +145,9 @@ if [[ "$response5" -ne 200 ]]; then
 fi
 
 if [[ "$error_occurred" -eq 1 ]]; then
-    echo "Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!"
-    echo "Nitro Error Logs:"
-    cat /tmp/nitro.log
+    echo "cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!"
+    echo "cortex-cpp Error Logs:"
+    cat /tmp/cortex-cpp.log
     kill $pid
     exit 1
 fi
@@ -172,7 +172,7 @@ echo "----------------------"
 echo "Log run test:"
 cat /tmp/embedding-res.log
 
-echo "Nitro test run successfully!"
+echo "cortex-cpp test run successfully!"
 
 # Kill the server process
 kill $pid
diff --git a/.github/scripts/e2e-test-llama-windows.bat b/.github/scripts/e2e-test-llama-windows.bat
index cddca1e0b..b11e38bb1 100644
--- a/.github/scripts/e2e-test-llama-windows.bat
+++ b/.github/scripts/e2e-test-llama-windows.bat
@@ -23,7 +23,7 @@ del %TEMP%\response2.log 2>nul
 del %TEMP%\response3.log 2>nul
 del %TEMP%\response4.log 2>nul
 del %TEMP%\response5.log 2>nul
-del %TEMP%\nitro.log 2>nul
+del %TEMP%\cortex-cpp.log 2>nul
 
 set /a min=9999
 set /a max=11000
@@ -31,11 +31,11 @@ set /a range=max-min+1
 set /a PORT=%min% + %RANDOM% %% %range%
 
 rem Start the binary file
-start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\nitro.log 2>&1
+start /B "" "%BINARY_PATH%" 1 "127.0.0.1" %PORT% > %TEMP%\cortex-cpp.log 2>&1
 
 ping -n 6 127.0.0.1 %PORT% > nul
 
-rem Capture the PID of the started process with "nitro" in its name
+rem Capture the PID of the started process with "cortex-cpp" in its name
 for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^| findstr /B "PID:"') do (
     set "pid=%%a"
 )
@@ -43,8 +43,8 @@ for /f "tokens=2" %%a in ('tasklist /fi "imagename eq %BINARY_NAME%" /fo list ^|
 echo pid=%pid%
 
 if not defined pid (
-    echo nitro failed to start. Logs:
-    type %TEMP%\nitro.log
+    echo cortex-cpp failed to start. Logs:
+    type %TEMP%\cortex-cpp.log
     exit /b 1
 )
 
@@ -76,15 +76,15 @@ echo curl_data4=%curl_data4%
 echo curl_data5=%curl_data5%
 
 rem Run the curl commands and capture the status code
-curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
+curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/chat_completion" ^
 --header "Content-Type: application/json" ^
 --data "%curl_data2%" > %TEMP%\response2.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
 
-curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
+curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/server/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
 
 curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^
 --header "Content-Type: application/json" ^
@@ -130,9 +130,9 @@ if "%response5%" neq "200" (
 )
 
 if "%error_occurred%"=="1" (
-    echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
-    echo Nitro Error Logs:
-    type %TEMP%\nitro.log
+    echo cortex-cpp test run failed!!!!!!!!!!!!!!!!!!!!!!
+    echo cortex-cpp Error Logs:
+    type %TEMP%\cortex-cpp.log
     taskkill /f /pid %pid%
     exit /b 1
 )
@@ -158,8 +158,8 @@ echo ----------------------
 echo Log run embedding test:
 type %TEMP%\response5.log
 
-echo Nitro test run successfully!
+echo cortex-cpp test run successfully!
 
 rem Kill the server process
 @REM taskkill /f /pid %pid%
-taskkill /f /im nitro.exe 2>nul || exit /B 0
\ No newline at end of file
+taskkill /f /im cortex-cpp.exe 2>nul || exit /B 0
\ No newline at end of file
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 356a2a9ca..716dfd679 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -185,8 +185,8 @@ jobs:
       - name: Upload Artifact
         uses: actions/upload-artifact@v2
         with:
-          name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }}
-          path: ./cortex-cpp/cortex
+          name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }}
+          path: ./cortex-cpp/cortex-cpp
 
       - uses: actions/upload-release-asset@v1.0.1
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
@@ -194,6 +194,6 @@ jobs:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
           upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
-          asset_path: ./cortex-cpp/cortex.tar.gz
-          asset_name: cortex-llamacpp-engine-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
+          asset_path: ./cortex-cpp/cortex-cpp.tar.gz
+          asset_name: cortex-cpp-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
           asset_content_type: application/gzip
\ No newline at end of file
diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml
index 82930e0ed..909ab7e77 100644
--- a/.github/workflows/quality-gate.yml
+++ b/.github/workflows/quality-gate.yml
@@ -159,5 +159,5 @@ jobs:
       - name: Upload Artifact
         uses: actions/upload-artifact@v2
         with:
-          name: cortex-llamacpp-engine-${{ matrix.os }}-${{ matrix.name }}
-          path: ./cortex-cpp/cortex
\ No newline at end of file
+          name: cortex-cpp-${{ matrix.os }}-${{ matrix.name }}
+          path: ./cortex-cpp/cortex-cpp
\ No newline at end of file
diff --git a/.github/workflows/update-release-url.yml b/.github/workflows/update-release-url.yml
deleted file mode 100644
index 710d23ba7..000000000
--- a/.github/workflows/update-release-url.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-name: Update Download URLs
-
-on:
-  release:
-    types:
-      - published
-
-  workflow_dispatch:
-
-jobs:
-  update-readme:
-    runs-on: ubuntu-latest
-    environment: production
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: "0"
-          token: ${{ secrets.PAT_SERVICE_ACCOUNT }}
-          ref: main
-
-      - name: Get Latest Release
-        uses: pozetroninc/github-action-get-latest-release@v0.7.0
-        id: get-latest-release
-        with:
-          repository: ${{ github.repository }}
-
-      - name: Update Download URLs in README.md
-        run: |
-          echo "Latest Release: ${{ steps.get-latest-release.outputs.release }}"
-          tag=$(/bin/echo -n "${{ steps.get-latest-release.outputs.release }}")
-          echo "Tag: $tag"
-          # Remove the v prefix
-          release=${tag:1}
-          echo "Release: $release"
-          sed -i "s|<a href='https://github.com/janhq/nitro/releases/download/v.*-linux-amd64-cuda.tar.gz'>|<a href='https://github.com/janhq/nitro/releases/download/v${release}/nitro-${release}-linux-amd64-cuda.tar.gz'>|" README.md
-          sed -i "s|<a href='https://github.com/janhq/nitro/releases/download/v.*-linux-amd64.tar.gz'>|<a href='https://github.com/janhq/nitro/releases/download/v${release}/nitro-${release}-linux-amd64.tar.gz'>|" README.md
-          sed -i "s|<a href='https://github.com/janhq/nitro/releases/download/v.*-mac-amd64.tar.gz'>|<a href='https://github.com/janhq/nitro/releases/download/v${release}/nitro-${release}-mac-amd64.tar.gz'>|" README.md
-          sed -i "s|<a href='https://github.com/janhq/nitro/releases/download/v.*-mac-arm64.tar.gz'>|<a href='https://github.com/janhq/nitro/releases/download/v${release}/nitro-${release}-mac-arm64.tar.gz'>|" README.md
-          sed -i "s|<a href='https://github.com/janhq/nitro/releases/download/v.*-win-amd64-cuda.tar.gz'>|<a href='https://github.com/janhq/nitro/releases/download/v${release}/nitro-${release}-win-amd64-cuda.tar.gz'>|" README.md
-          sed -i "s|<a href='https://github.com/janhq/nitro/releases/download/v.*-win-amd64.tar.gz'>|<a href='https://github.com/janhq/nitro/releases/download/v${release}/nitro-${release}-win-amd64.tar.gz'>|" README.md
-
-      - name: Commit and Push changes
-        if: github.event_name == 'release'
-        run: |
-          git config --global user.email "service@jan.ai"
-          git config --global user.name "Service Account"
-          git add README.md
-          git commit -m "Update README.md with Stable Download URLs"
-          git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:main
\ No newline at end of file
diff --git a/cortex-cpp/.gitignore b/cortex-cpp/.gitignore
index 69c167305..10d117410 100644
--- a/cortex-cpp/.gitignore
+++ b/cortex-cpp/.gitignore
@@ -85,7 +85,6 @@ CMakeCache.txt
 CMakeFiles
 CMakeScripts
 Testing
-!nitro-node/Makefile
 cmake_install.cmake
 install_manifest.txt
 compile_commands.json
@@ -561,7 +560,7 @@ FodyWeavers.xsd
 
 # End of https://www.toptal.com/developers/gitignore/api/intellij+all,visualstudio,visualstudiocode,cmake,c,c++
 build
-build_deps
+build-deps
 .DS_Store
 
 uploads/**
\ No newline at end of file
diff --git a/cortex-cpp/CMakeLists.txt b/cortex-cpp/CMakeLists.txt
index 97be0e86d..8c01d2256 100644
--- a/cortex-cpp/CMakeLists.txt
+++ b/cortex-cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.5)
-project(nitro C CXX)
+project(cortex-cpp C CXX)
 
 include(engines/cortex.llamacpp/engine.cmake)
 include(CheckIncludeFileCXX)
@@ -21,7 +21,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 set(OPENSSL_USE_STATIC_LIBS TRUE)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build_deps/_install)
+set(CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/build-deps/_install)
 # This is the critical line for installing another package
 
 if(LLAMA_CUDA)
@@ -35,12 +35,12 @@ if(LLAMA_CUDA)
 endif()
 
 if(DEBUG)
-  message(STATUS "NITRO DEBUG IS ON")
+  message(STATUS "CORTEX-CPP DEBUG IS ON")
   add_compile_definitions(ALLOW_ALL_CORS)
 endif()
 
-if(NOT DEFINED NITRO_VERSION)
-  set(NITRO_VERSION "default_version")
+if(NOT DEFINED CORTEX_CPP_VERSION)
+  set(CORTEX_CPP_VERSION "default_version")
 endif()
 
 if(APPLE)
@@ -54,7 +54,7 @@ if(APPLE)
   endif()
 endif()
 
-add_compile_definitions(NITRO_VERSION="${NITRO_VERSION}")
+add_compile_definitions(CORTEX_CPP_VERSION="${CORTEX_CPP_VERSION}")
 
 add_subdirectory(test)
 
@@ -62,8 +62,8 @@ add_executable(${PROJECT_NAME} main.cc)
 
 # ##############################################################################
 # If you include the drogon source code locally in your project, use this method
-# to add drogon add_subdirectory(nitro_deps)
-# target_link_libraries(${PROJECT_NAME} PRIVATE nitro_deps)
+# to add drogon add_subdirectory(cortex-cpp-deps)
+# target_link_libraries(${PROJECT_NAME} PRIVATE cortex-cpp-deps)
 #
 # and comment out the following lines
 
diff --git a/cortex-cpp/Makefile b/cortex-cpp/Makefile
index 960bb198a..9f4c98d1b 100644
--- a/cortex-cpp/Makefile
+++ b/cortex-cpp/Makefile
@@ -14,8 +14,8 @@ all:
 # Build the Cortex engine
 build:
 ifeq ($(OS),Windows_NT)
-	@powershell -Command "cmake -S ./nitro_deps -B ./build_deps/nitro_deps;"
-	@powershell -Command "cmake --build ./build_deps/nitro_deps --config Release -j4;"
+	@powershell -Command "cmake -S ./cortex-cpp-deps -B ./build-deps/cortex-cpp-deps;"
+	@powershell -Command "cmake --build ./build-deps/cortex-cpp-deps --config Release -j4;"
 	@powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release -j4;"
 else ifeq ($(shell uname -s),Linux)
 	@./install_deps.sh;
@@ -31,23 +31,23 @@ endif
 
 package:
 ifeq ($(OS),Windows_NT)
-	@powershell -Command "mkdir -p cortex\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex\engines\cortex.llamacpp\;"
-	@powershell -Command "cp build\Release\nitro.exe .\cortex\;"
-	@powershell -Command "cp build_deps\_install\bin\zlib.dll .\cortex\;"
-	@powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex\;"
-	@powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex\;"
-	@powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex\;"
-	@powershell -Command "7z a -ttar temp.tar cortex\\*; 7z a -tgzip cortex.tar.gz temp.tar;"
+	@powershell -Command "mkdir -p cortex-cpp\engines\cortex.llamacpp\; cp build\engines\cortex.llamacpp\engine.dll cortex-cpp\engines\cortex.llamacpp\;"
+	@powershell -Command "cp build\Release\cortex-cpp.exe .\cortex-cpp\;"
+	@powershell -Command "cp build-deps\_install\bin\zlib.dll .\cortex-cpp\;"
+	@powershell -Command "cp ..\.github\patches\windows\msvcp140.dll .\cortex-cpp\;"
+	@powershell -Command "cp ..\.github\patches\windows\vcruntime140_1.dll .\cortex-cpp\;"
+	@powershell -Command "cp ..\.github\patches\windows\vcruntime140.dll .\cortex-cpp\;"
+	@powershell -Command "7z a -ttar temp.tar cortex-cpp\\*; 7z a -tgzip cortex-cpp.tar.gz temp.tar;"
 else ifeq ($(shell uname -s),Linux)
-	@mkdir -p cortex/engines/cortex.llamacpp; \
-	cp build/engines/cortex.llamacpp/libengine.so cortex/engines/cortex.llamacpp/; \
-	cp build/nitro cortex/; \
-	tar -czvf cortex.tar.gz cortex;
+	@mkdir -p cortex-cpp/engines/cortex.llamacpp; \
+	cp build/engines/cortex.llamacpp/libengine.so cortex-cpp/engines/cortex.llamacpp/; \
+	cp build/cortex-cpp cortex-cpp/; \
+	tar -czvf cortex-cpp.tar.gz cortex-cpp;
 else
-	@mkdir -p cortex/engines/cortex.llamacpp; \
-	cp build/engines/cortex.llamacpp/libengine.dylib cortex/engines/cortex.llamacpp/; \
-	cp build/nitro cortex/; \
-	tar -czvf cortex.llamacpp.tar.gz cortex;
+	@mkdir -p cortex-cpp/engines/cortex.llamacpp; \
+	cp build/engines/cortex.llamacpp/libengine.dylib cortex-cpp/engines/cortex.llamacpp/; \
+	cp build/cortex-cpp cortex-cpp/; \
+	tar -czvf cortex-cpp.tar.gz cortex-cpp;
 endif
 
 run-e2e-test:
@@ -56,13 +56,13 @@ ifeq ($(RUN_TESTS),false)
 	@exit 0
 endif
 ifeq ($(OS),Windows_NT)
-	@powershell -Command "cd cortex; ..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);"
+	@powershell -Command "cd cortex-cpp; ..\..\.github\scripts\e2e-test-llama-windows.bat cortex-cpp.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);"
 else ifeq ($(shell uname -s),Linux)
-	@cd cortex; \
-	chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
+	@cd cortex-cpp; \
+	chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
 	rm -rf uploads/;
 else
-	@cd cortex; \
-	chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
+	@cd cortex-cpp; \
+	chmod +x ../../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../../.github/scripts/e2e-test-llama-linux-and-mac.sh ./cortex-cpp $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); \
 	rm -rf uploads/;
 endif
\ No newline at end of file
diff --git a/cortex-cpp/README.md b/cortex-cpp/README.md
index f10301363..009c0254f 100644
--- a/cortex-cpp/README.md
+++ b/cortex-cpp/README.md
@@ -1,14 +1,14 @@
-# Nitro - Embeddable AI
+# cortex-cpp - Embeddable AI
 <p align="center">
   <img alt="nitrologo" src="https://raw.githubusercontent.com/janhq/nitro/main/assets/Nitro%20README%20banner.png">
 </p>
 
 <p align="center">
   <a href="https://nitro.jan.ai/docs">Documentation</a> - <a href="https://nitro.jan.ai/api-reference">API Reference</a> 
-  - <a href="https://github.com/janhq/nitro/releases/">Changelog</a> - <a href="https://github.com/janhq/nitro/issues">Bug reports</a> - <a href="https://discord.gg/AsJ8krTT3N">Discord</a>
+  - <a href="https://github.com/janhq/cortex/releases/">Changelog</a> - <a href="https://github.com/janhq/cortex/issues">Bug reports</a> - <a href="https://discord.gg/AsJ8krTT3N">Discord</a>
 </p>
 
-> ⚠️ **Nitro is currently in Development**: Expect breaking changes and bugs!
+> ⚠️ **cortex-cpp is currently in Development**: Expect breaking changes and bugs!
 
 ## Features
 - Fast Inference: Built on top of the cutting-edge inference library llama.cpp, modified to be production ready.
@@ -17,11 +17,11 @@
 - Quick Setup: Approximately 10-second initialization for swift deployment.
 - Enhanced Web Framework: Incorporates drogon cpp to boost web service efficiency.
 
-## About Nitro
+## About cortex-cpp
 
-Nitro is a high-efficiency C++ inference engine for edge computing, powering [Jan](https://jan.ai/). It is lightweight and embeddable, ideal for product integration.
+cortex-cpp is a high-efficiency C++ inference engine for edge computing, powering [Jan](https://jan.ai/). It is lightweight and embeddable, ideal for product integration.
 
-The binary of nitro after zipped is only ~3mb in size with none to minimal dependencies (if you use a GPU need CUDA for example) make it desirable for any edge/server deployment 👍.
+The binary of cortex-cpp after zipped is only ~3mb in size with none to minimal dependencies (if you use a GPU need CUDA for example) make it desirable for any edge/server deployment 👍.
 
 > Read more about Nitro at https://nitro.jan.ai/
 
@@ -32,13 +32,13 @@ The binary of nitro after zipped is only ~3mb in size with none to minimal depen
 ├── controllers
 ├── docs 
 ├── llama.cpp -> Upstream llama C++
-├── nitro_deps -> Dependencies of the Nitro project as a sub-project
+├── cortex-cpp-deps -> Dependencies of the cortex-cpp project as a sub-project
 └── utils
 ```
 
 ## Quickstart
 
-**Step 1: Install Nitro**
+**Step 1: Install cortex-cpp**
 
 - For Linux and MacOS
 
@@ -59,16 +59,16 @@ mkdir model && cd model
 wget -O llama-2-7b-model.gguf https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf?download=true
 ```
 
-**Step 3: Run Nitro server**
+**Step 3: Run cortex-cpp server**
 
-```bash title="Run Nitro server"
-nitro
+```bash title="Run cortex-cpp server"
+cortex-cpp
 ```
 
 **Step 4: Load model** 
 
 ```bash title="Load model"
-curl http://localhost:3928/inferences/llamacpp/loadmodel \
+curl http://localhost:3928/inferences/server/loadmodel \
   -H 'Content-Type: application/json' \
   -d '{
     "llama_model_path": "/model/llama-2-7b-model.gguf",
@@ -79,7 +79,7 @@ curl http://localhost:3928/inferences/llamacpp/loadmodel \
 
 **Step 5: Making an Inference**
 
-```bash title="Nitro Inference"
+```bash title="cortex-cpp Inference"
 curl http://localhost:3928/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
@@ -118,17 +118,17 @@ Table of parameters
 
 ***OPTIONAL***: You can run Nitro on a different port like 5000 instead of 3928 by running it manually in terminal
 ```zsh
-./nitro 1 127.0.0.1 5000 ([thread_num] [host] [port] [uploads_folder_path])
+./cortex-cpp 1 127.0.0.1 5000 ([thread_num] [host] [port] [uploads_folder_path])
 ```
-- thread_num : the number of thread that nitro webserver needs to have
+- thread_num : the number of thread that cortex-cpp webserver needs to have
 - host : host value normally 127.0.0.1 or 0.0.0.0
-- port : the port that nitro got deployed onto
+- port : the port that cortex-cpp got deployed onto
 - uploads_folder_path: custom path for file uploads in Drogon.
 
-Nitro server is compatible with the OpenAI format, so you can expect the same output as the OpenAI ChatGPT API.
+cortex-cpp server is compatible with the OpenAI format, so you can expect the same output as the OpenAI ChatGPT API.
 
 ## Compile from source
-To compile nitro please visit [Compile from source](docs/docs/new/build-source.md)
+To compile cortex-cpp please visit [Compile from source](docs/docs/new/build-source.md)
 
 ## Download
 
@@ -188,7 +188,7 @@ To compile nitro please visit [Compile from source](docs/docs/new/build-source.m
   </tr>
 </table>
 
-Download the latest version of Nitro at https://nitro.jan.ai/ or visit the **[GitHub Releases](https://github.com/janhq/nitro/releases)** to download any previous release.
+Download the latest version of Nitro at https://nitro.jan.ai/ or visit the **[GitHub Releases](https://github.com/janhq/cortex/releases)** to download any previous release.
 
 ## Nightly Build
 
diff --git a/cortex-cpp/controllers/health.cc b/cortex-cpp/controllers/health.cc
index 196d6de47..db4df4b22 100644
--- a/cortex-cpp/controllers/health.cc
+++ b/cortex-cpp/controllers/health.cc
@@ -1,12 +1,12 @@
 #include "health.h"
-#include "utils/nitro_utils.h"
+#include "utils/cortex_utils.h"
 
 void health::asyncHandleHttpRequest(
     const HttpRequestPtr &req,
     std::function<void(const HttpResponsePtr &)> &&callback) {
-  auto resp = nitro_utils::nitroHttpResponse();
+  auto resp = cortex_utils::nitroHttpResponse();
   resp->setStatusCode(k200OK);
   resp->setContentTypeCode(CT_TEXT_HTML);
-  resp->setBody("Nitro is alive!!!");
+  resp->setBody("cortex-cpp is alive!!!");
   callback(resp);
 }
diff --git a/cortex-cpp/controllers/llamaCPP.cc b/cortex-cpp/controllers/server.cc
similarity index 85%
rename from cortex-cpp/controllers/llamaCPP.cc
rename to cortex-cpp/controllers/server.cc
index d3e489072..af8a91df8 100644
--- a/cortex-cpp/controllers/llamaCPP.cc
+++ b/cortex-cpp/controllers/server.cc
@@ -1,4 +1,4 @@
-#include "llamaCPP.h"
+#include "server.h"
 
 #include <chrono>
 #include <fstream>
@@ -6,7 +6,7 @@
 
 #include "trantor/utils/Logger.h"
 #include "utils/logging_utils.h"
-#include "utils/nitro_utils.h"
+#include "utils/cortex_utils.h"
 
 using namespace inferences;
 using json = nlohmann::json;
@@ -16,7 +16,7 @@ constexpr static auto kLlamaEngine = "cortex.llamacpp";
 constexpr static auto kLlamaLibPath = "./engines/cortex.llamacpp";
 }  // namespace
 
-llamaCPP::llamaCPP()
+server::server()
     : engine_{nullptr} {
 
           // Some default values for now below
@@ -25,15 +25,15 @@ llamaCPP::llamaCPP()
           // system ()
       };
 
-llamaCPP::~llamaCPP() {}
+server::~server() {}
 
-void llamaCPP::ChatCompletion(
+void server::ChatCompletion(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
   if (!IsEngineLoaded()) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
-    auto resp = nitro_utils::nitroHttpJsonResponse(res);
+    auto resp = cortex_utils::nitroHttpJsonResponse(res);
     resp->setStatusCode(k409Conflict);
     callback(resp);
     LOG_WARN << "Engine is not loaded yet";
@@ -57,13 +57,13 @@ void llamaCPP::ChatCompletion(
   LOG_TRACE << "Done chat completion";
 }
 
-void llamaCPP::Embedding(
+void server::Embedding(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
   if (!IsEngineLoaded()) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
-    auto resp = nitro_utils::nitroHttpJsonResponse(res);
+    auto resp = cortex_utils::nitroHttpJsonResponse(res);
     resp->setStatusCode(k409Conflict);
     callback(resp);
     LOG_WARN << "Engine is not loaded yet";
@@ -81,13 +81,13 @@ void llamaCPP::Embedding(
   LOG_TRACE << "Done embedding";
 }
 
-void llamaCPP::UnloadModel(
+void server::UnloadModel(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
   if (!IsEngineLoaded()) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
-    auto resp = nitro_utils::nitroHttpJsonResponse(res);
+    auto resp = cortex_utils::nitroHttpJsonResponse(res);
     resp->setStatusCode(k409Conflict);
     callback(resp);
     LOG_WARN << "Engine is not loaded yet";
@@ -97,7 +97,7 @@ void llamaCPP::UnloadModel(
   engine_->UnloadModel(
       req->getJsonObject(),
       [cb = std::move(callback)](Json::Value status, Json::Value res) {
-        auto resp = nitro_utils::nitroHttpJsonResponse(res);
+        auto resp = cortex_utils::nitroHttpJsonResponse(res);
         resp->setStatusCode(
             static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
         cb(resp);
@@ -105,13 +105,13 @@ void llamaCPP::UnloadModel(
   LOG_TRACE << "Done unload model";
 }
 
-void llamaCPP::ModelStatus(
+void server::ModelStatus(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
   if (!IsEngineLoaded()) {
     Json::Value res;
     res["message"] = "Engine is not loaded yet";
-    auto resp = nitro_utils::nitroHttpJsonResponse(res);
+    auto resp = cortex_utils::nitroHttpJsonResponse(res);
     resp->setStatusCode(k409Conflict);
     callback(resp);
     LOG_WARN << "Engine is not loaded yet";
@@ -122,7 +122,7 @@ void llamaCPP::ModelStatus(
   engine_->GetModelStatus(
       req->getJsonObject(),
       [cb = std::move(callback)](Json::Value status, Json::Value res) {
-        auto resp = nitro_utils::nitroHttpJsonResponse(res);
+        auto resp = cortex_utils::nitroHttpJsonResponse(res);
         resp->setStatusCode(
             static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
         cb(resp);
@@ -130,7 +130,7 @@ void llamaCPP::ModelStatus(
   LOG_TRACE << "Done get model status";
 }
 
-void llamaCPP::LoadModel(
+void server::LoadModel(
     const HttpRequestPtr& req,
     std::function<void(const HttpResponsePtr&)>&& callback) {
   auto engine_type =
@@ -157,7 +157,7 @@ void llamaCPP::LoadModel(
     if (!dylib_) {
       Json::Value res;
       res["message"] = "Could not load engine " + cur_engine_name_;
-      auto resp = nitro_utils::nitroHttpJsonResponse(res);
+      auto resp = cortex_utils::nitroHttpJsonResponse(res);
       resp->setStatusCode(k500InternalServerError);
       callback(resp);
       return;
@@ -171,7 +171,7 @@ void llamaCPP::LoadModel(
   engine_->LoadModel(
       req->getJsonObject(),
       [cb = std::move(callback)](Json::Value status, Json::Value res) {
-        auto resp = nitro_utils::nitroHttpJsonResponse(res);
+        auto resp = cortex_utils::nitroHttpJsonResponse(res);
         resp->setStatusCode(
             static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
         cb(resp);
@@ -179,7 +179,7 @@ void llamaCPP::LoadModel(
   LOG_TRACE << "Done load model";
 }
 
-void llamaCPP::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
+void server::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
                                 std::shared_ptr<SyncQueue> q) {
   auto err_or_done = std::make_shared<std::atomic_bool>(false);
   auto chunked_content_provider =
@@ -208,21 +208,21 @@ void llamaCPP::ProcessStreamRes(std::function<void(const HttpResponsePtr&)> cb,
     return n;
   };
 
-  auto resp = nitro_utils::nitroStreamResponse(chunked_content_provider,
+  auto resp = cortex_utils::nitroStreamResponse(chunked_content_provider,
                                                "chat_completions.txt");
   cb(resp);
 }
 
-void llamaCPP::ProcessNonStreamRes(
+void server::ProcessNonStreamRes(
     std::function<void(const HttpResponsePtr&)> cb, SyncQueue& q) {
   auto [status, res] = q.wait_and_pop();
-  auto resp = nitro_utils::nitroHttpJsonResponse(res);
+  auto resp = cortex_utils::nitroHttpJsonResponse(res);
   resp->setStatusCode(
       static_cast<drogon::HttpStatusCode>(status["status_code"].asInt()));
   cb(resp);
 }
 
-bool llamaCPP::IsEngineLoaded() {
+bool server::IsEngineLoaded() {
   return !!engine_;
 }
 
diff --git a/cortex-cpp/controllers/llamaCPP.h b/cortex-cpp/controllers/server.h
similarity index 81%
rename from cortex-cpp/controllers/llamaCPP.h
rename to cortex-cpp/controllers/server.h
index 691b20cb4..3e12ab38a 100644
--- a/cortex-cpp/controllers/llamaCPP.h
+++ b/cortex-cpp/controllers/server.h
@@ -30,30 +30,30 @@ using namespace drogon;
 
 namespace inferences {
 
-class llamaCPP : public drogon::HttpController<llamaCPP>,
+class server : public drogon::HttpController<server>,
                  public BaseModel,
                  public BaseChatCompletion,
                  public BaseEmbedding {
   struct SyncQueue;
 
  public:
-  llamaCPP();
-  ~llamaCPP();
+  server();
+  ~server();
   METHOD_LIST_BEGIN
   // list path definitions here;
-  METHOD_ADD(llamaCPP::ChatCompletion, "chat_completion", Post);
-  METHOD_ADD(llamaCPP::Embedding, "embedding", Post);
-  METHOD_ADD(llamaCPP::LoadModel, "loadmodel", Post);
-  METHOD_ADD(llamaCPP::UnloadModel, "unloadmodel", Get);
-  METHOD_ADD(llamaCPP::ModelStatus, "modelstatus", Get);
+  METHOD_ADD(server::ChatCompletion, "chat_completion", Post);
+  METHOD_ADD(server::Embedding, "embedding", Post);
+  METHOD_ADD(server::LoadModel, "loadmodel", Post);
+  METHOD_ADD(server::UnloadModel, "unloadmodel", Get);
+  METHOD_ADD(server::ModelStatus, "modelstatus", Get);
 
   // Openai compatible path
-  ADD_METHOD_TO(llamaCPP::ChatCompletion, "/v1/chat/completions", Post);
-  // ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/chat/completions", Options);
+  ADD_METHOD_TO(server::ChatCompletion, "/v1/chat/completions", Post);
+  // ADD_METHOD_TO(server::handlePrelight, "/v1/chat/completions", Options);
   // NOTE: prelight will be added back when browser support is properly planned
 
-  ADD_METHOD_TO(llamaCPP::Embedding, "/v1/embeddings", Post);
-  // ADD_METHOD_TO(llamaCPP::handlePrelight, "/v1/embeddings", Options);
+  ADD_METHOD_TO(server::Embedding, "/v1/embeddings", Post);
+  // ADD_METHOD_TO(server::handlePrelight, "/v1/embeddings", Options);
 
   // PATH_ADD("/llama/chat_completion", Post);
   METHOD_LIST_END
diff --git a/cortex-cpp/nitro_deps/.gitignore b/cortex-cpp/cortex-cpp-deps/.gitignore
similarity index 100%
rename from cortex-cpp/nitro_deps/.gitignore
rename to cortex-cpp/cortex-cpp-deps/.gitignore
diff --git a/cortex-cpp/nitro_deps/CMakeLists.txt b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt
similarity index 95%
rename from cortex-cpp/nitro_deps/CMakeLists.txt
rename to cortex-cpp/cortex-cpp-deps/CMakeLists.txt
index 667024cd7..4e080a026 100644
--- a/cortex-cpp/nitro_deps/CMakeLists.txt
+++ b/cortex-cpp/cortex-cpp-deps/CMakeLists.txt
@@ -9,7 +9,7 @@ if(UNIX AND NOT APPLE)
 set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -fPIC")
 endif()
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-set(THIRD_PARTY_INSTALL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../build_deps/_install)
+set(THIRD_PARTY_INSTALL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../build-deps/_install)
 #if(NOT THIRD_PARTY_INSTALL_PATH )
 #  message(FATAL_ERROR "TRITON_THIRD_PARTY_INSTALL_PREFIX must be set")
 #endif() # TRITON_THIRD_PARTY_INSTALL_PREFIX
@@ -96,7 +96,7 @@ ExternalProject_Add(
 
 if(WIN32)
 	# Fix trantor cmakelists to link c-ares on Windows
-    set(TRANTOR_CMAKE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/../build_deps/nitro_deps/drogon-prefix/src/drogon/trantor/CMakeLists.txt)
+    set(TRANTOR_CMAKE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/../build-deps/cortex-cpp-deps/drogon-prefix/src/drogon/trantor/CMakeLists.txt)
     ExternalProject_Add_Step(drogon trantor_custom_target
         COMMAND ${CMAKE_COMMAND} -E echo add_definitions(-DCARES_STATICLIB) >> ${TRANTOR_CMAKE_FILE}
 	DEPENDEES download
diff --git a/cortex-cpp/nitro_deps/README.md b/cortex-cpp/cortex-cpp-deps/README.md
similarity index 100%
rename from cortex-cpp/nitro_deps/README.md
rename to cortex-cpp/cortex-cpp-deps/README.md
diff --git a/cortex-cpp/engines/cortex.llamacpp/engine.cmake b/cortex-cpp/engines/cortex.llamacpp/engine.cmake
index be7df6b12..a4922f863 100644
--- a/cortex-cpp/engines/cortex.llamacpp/engine.cmake
+++ b/cortex-cpp/engines/cortex.llamacpp/engine.cmake
@@ -1,5 +1,5 @@
 # cortex.llamacpp release version
-set(VERSION 0.1.0)
+set(VERSION 0.1.1)
 set(ENGINE_VERSION v${VERSION})
 
 # MESSAGE("ENGINE_VERSION=" ${ENGINE_VERSION})
diff --git a/cortex-cpp/examples/interface/README.md b/cortex-cpp/examples/interface/README.md
index 6d4df5aae..13f1518f3 100644
--- a/cortex-cpp/examples/interface/README.md
+++ b/cortex-cpp/examples/interface/README.md
@@ -1,4 +1,4 @@
-This guide provides instructions to create a chatbot powered by Nitro using the GGUF model.
+This guide provides instructions to create a chatbot powered by cortex-cpp using the GGUF model.
 
 ## Step 1: Download the Model
 
@@ -34,7 +34,7 @@ Now, you'll set up the model in your application.
 
     ```zsh title="Example Configuration" {2}
     dat = {
-        "llama_model_path": "nitro/interface/models/zephyr-7b-beta.Q5_K_M.gguf",
+        "llama_model_path": "cortex-cpp/interface/models/zephyr-7b-beta.Q5_K_M.gguf",
         "ctx_len": 2048,
         "ngl": 100,
         "embedding": True,
@@ -44,6 +44,6 @@ Now, you'll set up the model in your application.
         "ai_prompt": "ASSISTANT: "}
     ```
 
-Congratulations! Your Nitro chatbot is now set up. Feel free to experiment with different configuration parameters to tailor the chatbot to your needs.
+Congratulations! Your cortex-cpp chatbot is now set up. Feel free to experiment with different configuration parameters to tailor the chatbot to your needs.
 
-For more information on parameter settings and their effects, please refer to Run Nitro(using-nitro) for a comprehensive parameters table.
\ No newline at end of file
+For more information on parameter settings and their effects, please refer to Run cortex-cpp(using-cortex-cpp) for a comprehensive parameters table.
\ No newline at end of file
diff --git a/cortex-cpp/examples/interface/app.py b/cortex-cpp/examples/interface/app.py
index 3b9e32b05..6a0098130 100644
--- a/cortex-cpp/examples/interface/app.py
+++ b/cortex-cpp/examples/interface/app.py
@@ -5,8 +5,8 @@
 import requests
 
 # URLs for model loading and chat completion
-load_model_url = "http://localhost:3928/inferences/llamacpp/loadmodel"
-chat_completion_url = "http://localhost:3928/inferences/llamacpp/chat_completion"
+load_model_url = "http://localhost:3928/inferences/server/loadmodel"
+chat_completion_url = "http://localhost:3928/inferences/server/chat_completion"
 
 headers = {
     'Content-Type': 'application/json'
@@ -15,7 +15,7 @@
 # Function to load the model
 def load_model():
     load_data = {
-        "llama_model_path": "nitro/model/llama-2-7b-chat.Q5_K_M.gguf?download=true"
+        "llama_model_path": "cortex-cpp/model/llama-2-7b-chat.Q5_K_M.gguf?download=true"
         # Add other necessary parameters if required
     }
     response = requests.post(load_model_url, headers=headers, data=json.dumps(load_data))
@@ -43,7 +43,7 @@ def add_file(history, file):
 def bot(history):
     last_message = history[-1][0] if history else ""
     dat = {
-        "llama_model_path": "nitro/model/llama-2-7b-chat.Q5_K_M.gguf?download=true",
+        "llama_model_path": "cortex-cpp/model/llama-2-7b-chat.Q5_K_M.gguf?download=true",
         "messages": [
             {
                 "role": "user",
@@ -73,7 +73,7 @@ def bot(history):
         [],
         elem_id="chatbot",
         bubble_full_width=False,
-        avatar_images=(None, (os.path.join(os.path.dirname(__file__), "nitro/example/avatar.png"))),
+        avatar_images=(None, (os.path.join(os.path.dirname(__file__), "cortex-cpp/example/avatar.png"))),
     )
 
     with gr.Row():
@@ -89,4 +89,4 @@ def bot(history):
 # Launch the application
 if __name__ == "__main__":
     demo.queue()
-    demo.launch(allowed_paths=["nitro/example/avatar.png"])
\ No newline at end of file
+    demo.launch(allowed_paths=["cortex-cpp/example/avatar.png"])
\ No newline at end of file
diff --git a/cortex-cpp/install.bat b/cortex-cpp/install.bat
index 919204b60..fc2faa4b1 100644
--- a/cortex-cpp/install.bat
+++ b/cortex-cpp/install.bat
@@ -1,10 +1,10 @@
 @echo off
 setlocal
 
-:: Remove existing nitro directory if it exists
-if exist "%APPDATA%\nitro" (
-    echo Removing existing Nitro installation...
-    rmdir /S /Q "%APPDATA%\nitro"
+:: Remove existing cortex-cpp directory if it exists
+if exist "%APPDATA%\cortex-cpp" (
+    echo Removing existing cortex-cpp installation...
+    rmdir /S /Q "%APPDATA%\cortex-cpp"
 )
 
 :: Parse arguments
@@ -40,34 +40,34 @@ echo %VERSION%
 
 :: Get the release
 if "%VERSION%"=="latest" (
-    :: If the version is set to "latest", get the latest version number from the Nitro GitHub repository
-    for /f "delims=" %%i in ('powershell -Command "& {$version = Invoke-RestMethod -Uri 'https://api.github.com/repos/janhq/nitro/releases/latest'; return $version.tag_name.TrimStart('v')}"') do set "VERSION=%%i"
+    :: If the version is set to "latest", get the latest version number from the cortex-cpp GitHub repository
+    for /f "delims=" %%i in ('powershell -Command "& {$version = Invoke-RestMethod -Uri 'https://api.github.com/repos/janhq/cortex/releases/latest'; return $version.tag_name.TrimStart('v')}"') do set "VERSION=%%i"
 )
 
 :: Construct the download URL
-set "URL=https://github.com/janhq/nitro/releases/download/v%VERSION%/nitro-%VERSION%-win-amd64%AVX%"
+set "URL=https://github.com/janhq/cortex/releases/download/v%VERSION%/cortex-cpp-%VERSION%-win-amd64%AVX%"
 if "%GPU%"=="true" (
     :: If --gpu option is provided, append -cuda to the URL
     set "URL=%URL%-cuda"
 )
 set "URL=%URL%.tar.gz"
 
-:: Download and extract nitro
-echo Downloading Nitro from: %URL%
-powershell -Command "Invoke-WebRequest -OutFile '%TEMP%\nitro.tar.gz' '%URL%'"
-echo Extracting Nitro...
-powershell -Command "mkdir '%APPDATA%\nitro'"
-powershell -Command "tar -zxvf '%TEMP%\nitro.tar.gz' -C '%APPDATA%\nitro'"
+:: Download and extract cortex-cpp
+echo Downloading cortex-cpp from: %URL%
+powershell -Command "Invoke-WebRequest -OutFile '%TEMP%\cortex-cpp.tar.gz' '%URL%'"
+echo Extracting cortex-cpp...
+powershell -Command "mkdir '%APPDATA%\cortex-cpp'"
+powershell -Command "tar -zxvf '%TEMP%\cortex-cpp.tar.gz' -C '%APPDATA%\cortex-cpp'"
 
-:: Add nitro to the PATH
-setx PATH "%APPDATA%\nitro;%PATH%"
+:: Add cortex-cpp to the PATH
+setx PATH "%APPDATA%\cortex-cpp;%PATH%"
 
-:: Create uninstallnitro.bat
-echo @echo off > "%APPDATA%\nitro\uninstallnitro.bat"
-echo setx PATH "%PATH:;%APPDATA%\nitro=;%"" >> "%APPDATA%\nitro\uninstallnitro.bat"
-echo rmdir /S /Q "%APPDATA%\nitro" >> "%APPDATA%\nitro\uninstallnitro.bat"
+:: Create uninstallcortex-cpp.bat
+echo @echo off > "%APPDATA%\cortex-cpp\uninstallcortex-cpp.bat"
+echo setx PATH "%PATH:;%APPDATA%\cortex-cpp=;%"" >> "%APPDATA%\cortex-cpp\uninstallcortex-cpp.bat"
+echo rmdir /S /Q "%APPDATA%\cortex-cpp" >> "%APPDATA%\cortex-cpp\uninstallcortex-cpp.bat"
 
 :: Clean up
-del %TEMP%\nitro.tar.gz
+del %TEMP%\cortex-cpp.tar.gz
 
 endlocal
diff --git a/cortex-cpp/install.sh b/cortex-cpp/install.sh
index 34bc29dc3..ee3499f31 100644
--- a/cortex-cpp/install.sh
+++ b/cortex-cpp/install.sh
@@ -51,17 +51,17 @@ determine_avx_support() {
     fi
 }
 
-# Function to download and install nitro
-install_nitro() {
-    rm -rf /tmp/nitro
-    rm /tmp/nitro.tar.gz
-    echo "Downloading Nitro version $VERSION... from $1"
-    curl -sL "$1" -o /tmp/nitro.tar.gz
-    tar -xzvf /tmp/nitro.tar.gz -C /tmp
-    ls /tmp/nitro
+# Function to download and install cortex-cpp
+install_cortex-cpp() {
+    rm -rf /tmp/cortex-cpp
+    rm /tmp/cortex-cpp.tar.gz
+    echo "Downloading cortex-cpp version $VERSION... from $1"
+    curl -sL "$1" -o /tmp/cortex-cpp.tar.gz
+    tar -xzvf /tmp/cortex-cpp.tar.gz -C /tmp
+    ls /tmp/cortex-cpp
 
     # Copying files to /usr/local/bin
-    for file in /tmp/nitro/*; do
+    for file in /tmp/cortex-cpp/*; do
         chmod +x "$file"
         cp "$file" /usr/local/bin/
     done
@@ -69,18 +69,18 @@ install_nitro() {
 
 # Function to create uninstall script
 create_uninstall_script() {
-    echo '#!/bin/bash' > /tmp/uninstall_nitro.sh
-    echo 'if [ "$(id -u)" != "0" ]; then' >> /tmp/uninstall_nitro.sh
-    echo '    echo "This script must be run as root. Please run again with sudo."' >> /tmp/uninstall_nitro.sh
-    echo '    exit 1' >> /tmp/uninstall_nitro.sh
-    echo 'fi' >> /tmp/uninstall_nitro.sh
-    for file in /tmp/nitro/*; do
-        echo "rm /usr/local/bin/$(basename "$file")" >> /tmp/uninstall_nitro.sh
+    echo '#!/bin/bash' > /tmp/uninstall_cortex-cpp.sh
+    echo 'if [ "$(id -u)" != "0" ]; then' >> /tmp/uninstall_cortex-cpp.sh
+    echo '    echo "This script must be run as root. Please run again with sudo."' >> /tmp/uninstall_cortex-cpp.sh
+    echo '    exit 1' >> /tmp/uninstall_cortex-cpp.sh
+    echo 'fi' >> /tmp/uninstall_cortex-cpp.sh
+    for file in /tmp/cortex-cpp/*; do
+        echo "rm /usr/local/bin/$(basename "$file")" >> /tmp/uninstall_cortex-cpp.sh
     done
-    echo "rm /usr/local/bin/uninstall_nitro.sh" >> /tmp/uninstall_nitro.sh
-    echo 'echo "Nitro remove successfully."' >> /tmp/uninstall_nitro.sh
-    chmod +x /tmp/uninstall_nitro.sh
-    mv /tmp/uninstall_nitro.sh /usr/local/bin/
+    echo "rm /usr/local/bin/uninstall_cortex-cpp.sh" >> /tmp/uninstall_cortex-cpp.sh
+    echo 'echo "cortex-cpp remove successfully."' >> /tmp/uninstall_cortex-cpp.sh
+    chmod +x /tmp/uninstall_cortex-cpp.sh
+    mv /tmp/uninstall_cortex-cpp.sh /usr/local/bin/
 }
 
 # Determine OS and architecture
@@ -139,7 +139,7 @@ fi
 
 # Construct GitHub API URL and get latest version if not specified
 if [ "$VERSION" == "latest" ]; then
-    API_URL="https://api.github.com/repos/janhq/nitro/releases/latest"
+    API_URL="https://api.github.com/repos/janhq/cortex/releases/latest"
     VERSION=$(curl -s $API_URL | jq -r ".tag_name" | sed 's/^v//')
 fi
 
@@ -155,11 +155,11 @@ case $OS in
         if [ -z "$AVX" ]; then
             AVX=$(determine_avx_support)
         fi
-        FILE_NAME="nitro-${VERSION}-linux-amd64${AVX}${GPU}${CUDA_VERSION}.tar.gz"
+        FILE_NAME="cortex-cpp-${VERSION}-linux-amd64${AVX}${GPU}${CUDA_VERSION}.tar.gz"
         ;;
     Darwin)
         ARCH_FORMAT="mac-universal"
-        FILE_NAME="nitro-${VERSION}-${ARCH_FORMAT}.tar.gz"
+        FILE_NAME="cortex-cpp-${VERSION}-${ARCH_FORMAT}.tar.gz"
         ;;
     *)
         echo "Unsupported OS."
@@ -167,7 +167,7 @@ case $OS in
         ;;
 esac
 
-DOWNLOAD_URL="https://github.com/janhq/nitro/releases/download/v${VERSION}/${FILE_NAME}"
+DOWNLOAD_URL="https://github.com/janhq/cortex/releases/download/v${VERSION}/${FILE_NAME}"
 
 # Check AVX support
 if [ -z "$AVX" ] && [ "$OS" == "Linux" ]; then
@@ -175,12 +175,12 @@ if [ -z "$AVX" ] && [ "$OS" == "Linux" ]; then
     exit 1
 fi
 
-# Remove existing Nitro installation
-echo "Removing existing Nitro installation..."
-rm -rf /usr/local/bin/nitro
+# Remove existing cortex-cpp installation
+echo "Removing existing cortex-cpp installation..."
+rm -rf /usr/local/bin/cortex-cpp
 
 # Download, install, and create uninstall script
-install_nitro "$DOWNLOAD_URL"
+install_cortex-cpp "$DOWNLOAD_URL"
 create_uninstall_script
 
-echo "Nitro installed successfully."
+echo "cortex-cpp installed successfully."
diff --git a/cortex-cpp/install_deps.sh b/cortex-cpp/install_deps.sh
index d43257aa0..879b63cd8 100755
--- a/cortex-cpp/install_deps.sh
+++ b/cortex-cpp/install_deps.sh
@@ -1,3 +1,3 @@
-cmake -S ./nitro_deps -B ./build_deps/nitro_deps
-make -C ./build_deps/nitro_deps -j 10
-rm -rf ./build_deps/nitro_deps
+cmake -S ./cortex-cpp-deps -B ./build-deps/cortex-cpp-deps
+make -C ./build-deps/cortex-cpp-deps -j 10
+rm -rf ./build-deps/cortex-cpp-deps
diff --git a/cortex-cpp/main.cc b/cortex-cpp/main.cc
index 595186072..53c65cd37 100644
--- a/cortex-cpp/main.cc
+++ b/cortex-cpp/main.cc
@@ -1,15 +1,15 @@
-#include "utils/nitro_utils.h"
-#include <climits> // for PATH_MAX
 #include <drogon/HttpAppFramework.h>
 #include <drogon/drogon.h>
+#include <climits>  // for PATH_MAX
 #include <iostream>
+#include "utils/cortex_utils.h"
 
 #if defined(__APPLE__) && defined(__MACH__)
-#include <libgen.h> // for dirname()
+#include <libgen.h>  // for dirname()
 #include <mach-o/dyld.h>
 #elif defined(__linux__)
-#include <libgen.h> // for dirname()
-#include <unistd.h> // for readlink()
+#include <libgen.h>  // for dirname()
+#include <unistd.h>  // for readlink()
 #elif defined(_WIN32)
 #include <windows.h>
 #undef max
@@ -17,13 +17,13 @@
 #error "Unsupported platform!"
 #endif
 
-int main(int argc, char *argv[]) {
+int main(int argc, char* argv[]) {
   int thread_num = 1;
   std::string host = "127.0.0.1";
   int port = 3928;
   std::string uploads_folder_path;
 
-  // Number of nitro threads
+  // Number of cortex-cpp threads
   if (argc > 1) {
     thread_num = std::atoi(argv[1]);
   }
@@ -35,7 +35,7 @@ int main(int argc, char *argv[]) {
 
   // Check for port argument
   if (argc > 3) {
-    port = std::atoi(argv[3]); // Convert string argument to int
+    port = std::atoi(argv[3]);  // Convert string argument to int
   }
 
   // Uploads folder path
@@ -45,11 +45,11 @@ int main(int argc, char *argv[]) {
 
   int logical_cores = std::thread::hardware_concurrency();
   int drogon_thread_num = std::max(thread_num, logical_cores);
-  nitro_utils::nitro_logo();
-#ifdef NITRO_VERSION
-  LOG_INFO << "Nitro version: " << NITRO_VERSION;
+  // cortex_utils::nitro_logo();
+#ifdef CORTEX_CPP_VERSION
+  LOG_INFO << "cortex-cpp version: " << CORTEX_CPP_VERSION;
 #else
-  LOG_INFO << "Nitro version: undefined";
+  LOG_INFO << "cortex-cpp version: undefined";
 #endif
   LOG_INFO << "Server started, listening at: " << host << ":" << port;
   LOG_INFO << "Please load your model";
diff --git a/cortex-cpp/test/components/test_nitro_utils.cc b/cortex-cpp/test/components/test_cortex_utils.cc
similarity index 76%
rename from cortex-cpp/test/components/test_nitro_utils.cc
rename to cortex-cpp/test/components/test_cortex_utils.cc
index adf3e976b..2d85f6909 100644
--- a/cortex-cpp/test/components/test_nitro_utils.cc
+++ b/cortex-cpp/test/components/test_cortex_utils.cc
@@ -1,5 +1,5 @@
 #include "gtest/gtest.h"
-#include "utils/nitro_utils.h"
+#include "utils/cortex_utils.h"
 
 class NitroUtilTest : public ::testing::Test {
 };
@@ -7,35 +7,35 @@ class NitroUtilTest : public ::testing::Test {
 TEST_F(NitroUtilTest, left_trim) {
     {
         std::string empty;
-        nitro_utils::ltrim(empty);
+        cortex_utils::ltrim(empty);
         EXPECT_EQ(empty, "");
     }
 
     {
         std::string s = "abc";
         std::string expected = "abc";
-        nitro_utils::ltrim(s);
+        cortex_utils::ltrim(s);
         EXPECT_EQ(s, expected);
     }
 
     {
         std::string s = " abc";
         std::string expected = "abc";
-        nitro_utils::ltrim(s);
+        cortex_utils::ltrim(s);
         EXPECT_EQ(s, expected);
     }
 
     {
         std::string s = "1 abc 2 ";
         std::string expected = "1 abc 2 ";
-        nitro_utils::ltrim(s);
+        cortex_utils::ltrim(s);
         EXPECT_EQ(s, expected);
     }
 
     {
         std::string s = " |abc";
         std::string expected = "|abc";
-        nitro_utils::ltrim(s);
+        cortex_utils::ltrim(s);
         EXPECT_EQ(s, expected);
     }
 }
diff --git a/cortex-cpp/utils/nitro_utils.h b/cortex-cpp/utils/cortex_utils.h
similarity index 99%
rename from cortex-cpp/utils/nitro_utils.h
rename to cortex-cpp/utils/cortex_utils.h
index c1087b345..2790e2d38 100644
--- a/cortex-cpp/utils/nitro_utils.h
+++ b/cortex-cpp/utils/cortex_utils.h
@@ -18,7 +18,7 @@
 #include <dirent.h>
 #endif
 
-namespace nitro_utils {
+namespace cortex_utils {
 
 inline std::string models_folder = "./models";
 
@@ -282,4 +282,4 @@ inline void ltrim(std::string& s) {
           }));
 };
 
-} // namespace nitro_utils
+} // namespace cortex_utils