janhq · tikikun · Nov 21, 2023 · Nov 21, 2023 · Nov 21, 2023 · Nov 21, 2023
diff --git a/docs/docs/api.md → docs/docs/api-reference.md b/docs/docs/api.md → docs/docs/api-reference.md
diff --git a/docs/docs/examples/chatbox.md b/docs/docs/examples/chatbox.md
@@ -0,0 +1,11 @@
+---
+title: Nitro with Chatbox
+---
+
+:::info COMING SOON
+:::
+
+<!-- 
+## What is Chatbox?
+
+## How to use Nitro as backend -->
diff --git a/docs/docs/features/chat.md b/docs/docs/features/chat.md
@@ -11,7 +11,7 @@ To send a single query to your chosen LLM, follow these steps:
 <div style={{ width: '50%', float: 'left', clear: 'left' }}>
 
 ```bash title="Nitro"
-curl http://localhost:3928/inferences/llamacpp/chat_completion \
+curl http://localhost:3928/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
     "model": "",
@@ -53,7 +53,7 @@ For ongoing conversations or multiple queries, the dialog request feature is ide
 <div style={{ width: '50%', float: 'left', clear: 'left' }}>
 
 ```bash title="Nitro"
-curl http://localhost:3928/inferences/llamacpp/chat_completion \
+curl http://localhost:3928/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
     "messages": [

diff --git a/docs/docs/features/embed.md b/docs/docs/features/embed.md
@@ -17,7 +17,7 @@ Here’s an example showing how to get the embedding result from the model:
 <div style={{ width: '50%', float: 'left', clear: 'left' }}>
 
 ```bash title="Nitro" {1}
-curl http://localhost:3928/inferences/llamacpp/embedding \
+curl http://localhost:3928/v1/embeddings \
     -H 'Content-Type: application/json' \
     -d '{
         "input": "Hello",

diff --git a/docs/docs/features/prompt.md b/docs/docs/features/prompt.md
@@ -41,7 +41,7 @@ curl http://localhost:3928/inferences/llamacpp/loadmodel \
 ### Testing the Assistant
 
 ```bash title="Pirate Assistant"
-curl http://localhost:3928/inferences/llamacpp/chat_completion \
+curl http://localhost:3928/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
     "messages": [

diff --git a/docs/docs/new/about.md b/docs/docs/new/about.md
@@ -24,7 +24,7 @@ For instance, compare the Nitro inference call:
 <div style={{ width: '50%', float: 'left', clear: 'left' }}>
 
 ```bash title="Nitro chat completion"
-curl http://localhost:3928/inferences/llamacpp/chat_completion \
+curl http://localhost:3928/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
     "model": "gpt-3.5-turbo",

diff --git a/docs/docs/new/install.md b/docs/docs/new/install.md
@@ -141,7 +141,7 @@ Simple testcase with nitro, after starting the server, you can run the following
       "embedding": false
   }'
   # Send a prompt request to nitro
-  curl -s --location 'http://localhost:3928/inferences/llamacpp/chat_completion' \
+  curl -s --location 'http://localhost:3928/v1/chat/completions' \
   --header 'Content-Type: application/json' \
   --data '{
           "messages": [
@@ -172,7 +172,7 @@ Simple testcase with nitro, after starting the server, you can run the following
 
   # Send a prompt request to nitro
   set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":100,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.7}"
-  curl.exe -s -w "%%{http_code}" --location "http://localhost:3928/inferences/llamacpp/chat_completion" ^
+  curl.exe -s -w "%%{http_code}" --location "http://localhost:3928/v1/chat/completions" ^
   --header "Content-Type: application/json" ^
   --data "%curl_data2%"
   ```

diff --git a/docs/docs/new/quickstart.md b/docs/docs/new/quickstart.md
@@ -26,7 +26,7 @@ Next, we need to download a model. For this example, we'll use the [Llama2 7B ch
 - Create a `/model` and navigate into it:
 ```bash
 mkdir model && cd model
-wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf?download=true
+wget -O llama-2-7b-model.gguf https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf?download=true
 ```
 
 ## Step 3: Run Nitro server
@@ -43,14 +43,28 @@ To check if the Nitro server is running:
 curl http://localhost:3928/healthz
 ```
 
-## Step 4: Making an Inference
+## Step 4: Load model
+
+To load the model to Nitro server, you need to run:
+
+```bash title="Load model"
+curl http://localhost:3928/inferences/llamacpp/loadmodel \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "llama_model_path": "/model/llama-2-7b-model.gguf",
+    "ctx_len": 512,
+    "ngl": 100,
+  }'
+```
+
+## Step 5: Making an Inference
 
 Finally, let's make an actual inference call using Nitro.
 
 - In your terminal, execute:
 
 ```bash title="Nitro Inference"
-curl http://localhost:3928/inferences/llamacpp/chat_completion \
+curl http://localhost:3928/v1/chat/completions \
   -H "Content-Type: application/json" \
   -d '{
     "messages": [

diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js
@@ -103,7 +103,7 @@ const config = {
           {
             spec: "openapi/NitroAPI.yaml", // can be local file, url, or parsed json object
             // spec: "openapi/OpenAIAPI.yaml",
-            route: "/api/",
+            route: "/api-reference/",
           },
         ],
         theme: {

diff --git a/docs/openapi/NitroAPI.yaml b/docs/openapi/NitroAPI.yaml
@@ -134,7 +134,7 @@ paths:
               schema:
                 $ref: "#/components/schemas/StatusResponse"
 
-  /inferences/llamacpp/embedding:
+  /v1/embeddings:
     post:
       operationId: createEmbedding
       tags:
@@ -162,7 +162,7 @@ paths:
               schema:
                 $ref: "#/components/schemas/CreateEmbeddingResponse"
 
-  /inferences/llamacpp/chat_completion:
+  /v1/chat/completions:
     post:
       operationId: createChatCompletion
       tags:

diff --git a/docs/sidebars.js b/docs/sidebars.js
@@ -49,7 +49,7 @@ const sidebars = {
       label: "Guides",
       collapsible: false,
       collapsed: false,
-      items: ["examples/llm"],
+      items: ["examples/chatbox"],
     },
     // {
     //   type: "category",
@@ -61,7 +61,7 @@ const sidebars = {
   ],
 
   apiSidebar: [
-    "api"
+    "api-reference"
   ],
 
   // communitySidebar: [

diff --git a/docs/src/styles/base.scss b/docs/src/styles/base.scss
@@ -1,7 +1,4 @@
 @layer base {
-  html {
-    @apply scroll-smooth;
-  }
   html[data-theme="light"] {
     --ifm-background-color: white;
     --ifm-color-primary: #2563eb; /* New Primary Blue */