From 616fbe81c6ca2fadba9aa6022deabe8b984bebd1 Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Tue, 29 Oct 2024 17:29:39 +0800
Subject: [PATCH 01/10] chore: bug report template

---
 .github/ISSUE_TEMPLATE/bug_report.yml | 35 +++++++++++++++++++++------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index d0182a83a..2306aaf2f 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -9,14 +9,14 @@ body:
       required: true
     attributes:
       label: "Cortex version"
-      description: "**Tip:** The version is in the app's bottom right corner"
-
+      description: "**Tip:** `cortex -v` outputs the version number"
+  
   - type: textarea
     validations:
       required: true
     attributes:
-      label: "Describe the Bug"
-      description: "A clear & concise description of the bug"
+      label: "Describe the issue and expected behaviour"
+      description: "A clear & concise description of the issue encountered"
 
   - type: textarea
     attributes:
@@ -28,18 +28,28 @@ body:
         2. Click on '...'
 
   - type: textarea
+    validations:
+      required: true
     attributes:
       label: "Screenshots / Logs"
       description: |
-        You can find logs in: ~/cortex/logs
+        You can find cortex-cli.log and cortex.log files in: ~/cortex/logs/
 
   - type: checkboxes
     attributes:
       label: "What is your OS?"
       options:
-        - label: MacOS
         - label: Windows
-        - label: Linux
+        - label: Mac Silicon 
+        - label: Mac Intel
+        - label: Linux / Ubuntu
+
+  - type: input
+    validations:
+      required: true
+    attributes:
+      label: "What OS Version are you running?"
+      description: "Eg Windows 10, Ubuntu 22, Mac M1"
 
   - type: checkboxes
     attributes:
@@ -47,4 +57,13 @@ body:
       options:
         - label: cortex.llamacpp (default)
         - label: cortex.tensorrt-llm (Nvidia GPUs)
-        - label: cortex.onnx (NPUs, DirectML)
\ No newline at end of file
+        - label: cortex.onnx (NPUs, DirectML)
+
+  - type: input
+    validations:
+      required: true
+    attributes:
+      label: "Hardware Specs eg OS version, GPU"
+      description:
+
+  
\ No newline at end of file

From 4c3803ab82914f8c07e78797be488d5099ded227 Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Tue, 29 Oct 2024 17:29:58 +0800
Subject: [PATCH 02/10] chore: bug report template

---
 .github/ISSUE_TEMPLATE/bug_report.yml | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index 2306aaf2f..6d4cae367 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -28,12 +28,10 @@ body:
         2. Click on '...'
 
   - type: textarea
-    validations:
-      required: true
     attributes:
       label: "Screenshots / Logs"
       description: |
-        You can find cortex-cli.log and cortex.log files in: ~/cortex/logs/
+        Please include cortex-cli.log and cortex.log files in: ~/cortex/logs/
 
   - type: checkboxes
     attributes:
@@ -44,13 +42,6 @@ body:
         - label: Mac Intel
         - label: Linux / Ubuntu
 
-  - type: input
-    validations:
-      required: true
-    attributes:
-      label: "What OS Version are you running?"
-      description: "Eg Windows 10, Ubuntu 22, Mac M1"
-
   - type: checkboxes
     attributes:
       label: "What engine are you running?"

From 6c532829cf6bdc14fba9dc9e8b117a18fad379da Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Tue, 29 Oct 2024 19:37:32 +0800
Subject: [PATCH 03/10] chore: remove embeddings from sidebar

---
 docs/sidebars.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sidebars.ts b/docs/sidebars.ts
index 09f2fb298..d5d7fd020 100644
--- a/docs/sidebars.ts
+++ b/docs/sidebars.ts
@@ -186,7 +186,7 @@ const sidebars: SidebarsConfig = {
     { type: "doc", id: "cli/cortex", label: "cortex" },
     { type: "doc", id: "cli/start", label: "cortex start" },
     { type: "doc", id: "cli/chat", label: "cortex chat" },
-    { type: "doc", id: "cli/embeddings", label: "cortex embeddings" },
+    // { type: "doc", id: "cli/embeddings", label: "cortex embeddings" },
     // { type: "doc", id: "cli/presets", label: "cortex presets" },
     { type: "doc", id: "cli/pull", label: "cortex pull" },
     { type: "doc", id: "cli/run", label: "cortex run" },

From 2e66496413bb1b332ac4d84290a6df8bd99f670c Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Wed, 30 Oct 2024 18:27:46 +0800
Subject: [PATCH 04/10] models page: remove onnx trt toggle

---
 docs/src/pages/models.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/pages/models.tsx b/docs/src/pages/models.tsx
index c9937c8b0..cb0e4c9e0 100644
--- a/docs/src/pages/models.tsx
+++ b/docs/src/pages/models.tsx
@@ -135,7 +135,7 @@ const ModelsPage = () => {
             Cortex has a built-in model collection of popular models.
           </p>
 
-          <Select onValueChange={(value) => handleChange(value)}>
+          {/* <Select onValueChange={(value) => handleChange(value)}>
             <SelectTrigger className="w-[180px] placeholder:text-red-200 font-semibold">
               <SelectValue placeholder="Select a Engine" />
             </SelectTrigger>
@@ -150,7 +150,7 @@ const ModelsPage = () => {
                 ONNX
               </SelectItem>
             </SelectContent>
-          </Select>
+          </Select> */}
         </div>
 
         <div className="w-full p-4 lg:p-8">

From b600af958f514ac4d72c661ef3d14d7b6ee078b4 Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Wed, 30 Oct 2024 18:28:26 +0800
Subject: [PATCH 05/10] hero logo: remove onnx trt, add model variants

---
 .../Homepage/SimpleHeroSection/index.tsx      | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/docs/src/containers/Homepage/SimpleHeroSection/index.tsx b/docs/src/containers/Homepage/SimpleHeroSection/index.tsx
index 5ee2cd6ec..ee0584d08 100644
--- a/docs/src/containers/Homepage/SimpleHeroSection/index.tsx
+++ b/docs/src/containers/Homepage/SimpleHeroSection/index.tsx
@@ -52,11 +52,24 @@ const SimpleHeroSection = () => {
               <code className="bg-transparent border-none inline-block">
                 <p className="text-neutral-500 mb-0"># Run Local LLMs</p>
                 <p className="mb-0">
-                  <span className="text-green-600">cortex&nbsp;</span>
-                  <span className="text-white">run&nbsp;</span>
-                  <span className="text-cyan-600">llama3.1</span>
+                  <span className="text-white">❯ </span>
+                  <span className="text-green-600">cortex run </span>
+                  <span className="text-cyan-600">llama3.2</span>
                 </p>
-                <p className="mb-0">
+                <p className="text-white mb-0">Available to download:</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;1. llama3.2:3b-gguf-q2-k</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;2. llama3.2:3b-gguf-q3-kl</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;3. llama3.2:3b-gguf-q3-km</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;4. llama3.2:3b-gguf-q3-ks</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;5. llama3.2:3b-gguf-q4-km (default)</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;6. llama3.2:3b-gguf-q4-ks</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;7. llama3.2:3b-gguf-q5-km</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;8. llama3.2:3b-gguf-q5-ks</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;9. llama3.2:3b-gguf-q6-k</p>
+                <p className="text-white mb-0">&nbsp;&nbsp;&nbsp;&nbsp;10. llama3.2:3b-gguf-q8-0</p>
+                <p className="mb-0"></p>
+                <p className="text-white mb-0">Select a model (1-10): 5</p>
+                {/* <p className="mb-0">
                   <span className="text-green-600">cortex&nbsp;</span>
                   <span className="text-white">run&nbsp;</span>
                   <span className="text-cyan-600">llama3.1:tensorrt-llm</span>
@@ -65,7 +78,7 @@ const SimpleHeroSection = () => {
                   <span className="text-green-600">cortex&nbsp;</span>
                   <span className="text-white">run&nbsp;</span>
                   <span className="text-cyan-600">llama3.1:onnx</span>
-                </p>
+                </p> */}
               </code>
             </div>
           </div>

From 280c94bf7170fe96cd24b6ceacd32a6731fb5490 Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Wed, 30 Oct 2024 18:58:10 +0800
Subject: [PATCH 06/10] Remove discord links

---
 docs/docs/architecture.mdx    | 4 ----
 docs/docs/troubleshooting.mdx | 6 +-----
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/docs/docs/architecture.mdx b/docs/docs/architecture.mdx
index f2cd598c2..852d7b7bc 100644
--- a/docs/docs/architecture.mdx
+++ b/docs/docs/architecture.mdx
@@ -148,7 +148,3 @@ Our development roadmap outlines key features and epics we will focus on in the
 
 - **RAG**: Improve response quality and contextual relevance in our AI models.
 - **Cortex Python Runtime**: Provide a scalable Python execution environment for Cortex.
-
-:::info
-For a full list of Cortex development roadmap, please see [here](https://discord.com/channels/1107178041848909847/1230770299730001941).
-:::
diff --git a/docs/docs/troubleshooting.mdx b/docs/docs/troubleshooting.mdx
index e50ab115b..34831e10a 100644
--- a/docs/docs/troubleshooting.mdx
+++ b/docs/docs/troubleshooting.mdx
@@ -152,8 +152,4 @@ lsof -i :1337
 kill -9 [pid]
 ```
     </TabItem>
-</Tabs>
-
-:::info
-For additional issues not listed above, please contact us on [Discord](https://discord.com/channels/1107178041848909847/1267305972733444147) or submit a [GitHub issue](https://github.com/janhq/cortex/issues). Include your `~/cortex/cortex.log` or an error snippet.
-:::
+</Tabs>
\ No newline at end of file

From 9b7eb44523003d83b26c75d5821b9b917d1f4379 Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Wed, 30 Oct 2024 19:06:21 +0800
Subject: [PATCH 07/10] Remove beta/nightly/onnx/trt from quickstart

---
 docs/docs/quickstart.mdx | 164 +++++----------------------------------
 1 file changed, 18 insertions(+), 146 deletions(-)

diff --git a/docs/docs/quickstart.mdx b/docs/docs/quickstart.mdx
index 0dea487ec..c0ba8e2e9 100644
--- a/docs/docs/quickstart.mdx
+++ b/docs/docs/quickstart.mdx
@@ -8,76 +8,51 @@ import Tabs from "@theme/Tabs";
 import TabItem from "@theme/TabItem";
 
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
+:::info
+Cortex.cpp is in active development. If you have any questions, please reach out to us:
+- [GitHub](https://github.com/janhq/cortex.cpp/issues/new/choose)
+- [Discord](https://discord.com/invite/FTk2MvZwJH)
 :::
 
-## Installation
-To install Cortex, download the installer for your operating system from the following options:
-- **Stable Version**
-  - [Windows](https://github.com/janhq/cortex.cpp/releases)
-  - [Mac](https://github.com/janhq/cortex.cpp/releases)
-  - [Linux (Debian)](https://github.com/janhq/cortex.cpp/releases)
-  - [Linux (Fedora)](https://github.com/janhq/cortex.cpp/releases)
+## Local Installation
+Cortex has an Local Installer that packages all required dependencies, so that no internet connection is required during the installation process.
+  - [Windows](https://app.cortexcpp.com/download/latest/windows-amd64-local)
+  - [Mac (Universal)](https://app.cortexcpp.com/download/latest/mac-universal-local)
+  - [Linux](https://app.cortexcpp.com/download/latest/linux-amd64-local)
+
 ## Start Cortex.cpp Processes and API Server
 This command starts the Cortex.cpp API server at `localhost:39281`.
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex start
-
-  # Beta
-  cortex-beta start
-
-  # Nightly
-  cortex-nightly start
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe start
-
-  # Beta
-  cortex-beta.exe start
-
-  # Nightly
-  cortex-nightly.exe start
   ```
   </TabItem>
 </Tabs>
+
 ## Run a Model
 This command downloads the default `gguf` model format from the [Cortex Hub](https://huggingface.co/cortexso), starts the model, and chat with the model.
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex run mistral
-
-  # Beta
-  cortex-beta run mistral
-
-  # Nightly
-  cortex-nightly run mistral
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe run mistral
-
-  # Beta
-  cortex-beta.exe run mistral
-
-  # Nightly
-  cortex-nightly.exe run mistral
   ```
   </TabItem>
 </Tabs>
 :::info
-All model files are stored in the `~users/cortex/models` folder.
+All model files are stored in the `~/cortex/models` folder.
 :::
+
 ## Using the Model
 ### API
 ```curl
@@ -103,153 +78,58 @@ curl http://localhost:39281/v1/chat/completions \
   "top_p": 1
 }'
 ```
-### Cortex.js
-```js
-const resp = await cortex.chat.completions.create({
-    model: "mistral",
-    messages: [
-      { role: "system", content: "You are a chatbot." },
-      { role: "user", content: "What is the capital of the United States?" },
-    ],
-  });
-```
-### Cortex.py
-```py
-completion = client.chat.completions.create(
-    model=mistral,
-    messages=[
-        {
-            "role": "user",
-            "content": "Say this is a test",
-        },
-    ],
-)
-```
+
 ## Stop a Model
 This command stops the running model.
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex models stop mistral
-
-  # Beta
-  cortex-beta models stop mistral
-
-  # Nightly
-  cortex-nightly models stop mistral
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe models stop mistral
-
-  # Beta
-  cortex-beta.exe models stop mistral
-
-  # Nightly
-  cortex-nightly.exe models stop mistral
   ```
   </TabItem>
 </Tabs>
+
 ## Show the System State
 This command displays the running model and the hardware system status.
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
   cortex ps
-
-  # Beta
-  cortex-beta ps
-
-  # Nightly
-  cortex-nightly ps
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
   cortex.exe ps
-
-  # Beta
-  cortex-beta.exe ps
-
-  # Nightly
-  cortex-nightly.exe ps
   ```
   </TabItem>
 </Tabs>
-## Run Different Model Variants
+
+<!-- ## Run Different Model Variants
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  # Stable
-  ## Run HuggingFace model with HuggingFace Repo
-  cortex run TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-
   # Run Mistral in ONNX format
   cortex run mistral:onnx
 
   # Run Mistral in TensorRT-LLM format
   cortex run mistral:tensorrt-llm
-
-  # Beta
-  ## Run HuggingFace model with HuggingFace Repo
-  cortex-beta run TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-
-  # Run Mistral in ONNX format
-  cortex-beta run mistral:onnx
-
-  # Run Mistral in TensorRT-LLM format
-  cortex-beta run mistral:tensorrt-llm
-
-  # Nightly
-  ## Run HuggingFace model with HuggingFace Repo
-  cortex-nightly run TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-
-  # Run Mistral in ONNX format
-  cortex-nightly run mistral:onnx
-
-  # Run Mistral in TensorRT-LLM format
-  cortex-nightly run mistral:tensorrt-llm
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  # Stable
-  ## Run HuggingFace model with HuggingFace Repo
-  cortex.exe run TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-
   # Run Mistral in ONNX format
   cortex.exe run mistral:onnx
 
   # Run Mistral in TensorRT-LLM format
   cortex.exe run mistral:tensorrt-llm
-
-  # Beta
-  ## Run HuggingFace model with HuggingFace Repo
-  cortex-beta.exe run TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-
-  # Run Mistral in ONNX format
-  cortex-beta.exe run mistral:onnx
-
-  # Run Mistral in TensorRT-LLM format
-  cortex-beta.exe run mistral:tensorrt-llm
-
-  # Nightly
-  ## Run HuggingFace model with HuggingFace Repo
-  cortex-nightly.exe run TheBloke/Mistral-7B-Instruct-v0.2-GGUF
-
-  # Run Mistral in ONNX format
-  cortex-nightly.exe run mistral:onnx
-
-  # Run Mistral in TensorRT-LLM format
-  cortex-nightly.exe run mistral:tensorrt-llm
   ```
   </TabItem>
-</Tabs>
+</Tabs> -->
 
 ## What's Next?
 Now that Cortex.cpp is set up, here are the next steps to explore:
@@ -258,11 +138,3 @@ Now that Cortex.cpp is set up, here are the next steps to explore:
 2. Explore the Cortex.cpp [data folder](/docs/data-folder) to understand how it stores data.
 3. Learn about the structure of the [`model.yaml`](/docs/model-yaml) file in Cortex.cpp.
 4. Integrate Cortex.cpp [libraries](/docs/category/libraries) seamlessly into your Python or JavaScript applications.
-
-
-:::info
-Cortex.cpp is still in early development, so if you have any questions, please reach out to us:
-
-- [GitHub](https://github.com/janhq/cortex)
-- [Discord](https://discord.gg/YFKKeuVu)
-  :::

From e6a5548c18cb3da2e00379f5a1bff2255cc3c65b Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Wed, 30 Oct 2024 19:46:12 +0800
Subject: [PATCH 08/10] Quickstart additional commands

---
 docs/docs/quickstart.mdx | 56 +++++++++++++++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 10 deletions(-)

diff --git a/docs/docs/quickstart.mdx b/docs/docs/quickstart.mdx
index c0ba8e2e9..687707c66 100644
--- a/docs/docs/quickstart.mdx
+++ b/docs/docs/quickstart.mdx
@@ -20,7 +20,7 @@ Cortex has an Local Installer that packages all required dependencies, so that n
   - [Mac (Universal)](https://app.cortexcpp.com/download/latest/mac-universal-local)
   - [Linux](https://app.cortexcpp.com/download/latest/linux-amd64-local)
 
-## Start Cortex.cpp Processes and API Server
+## Start Cortex.cpp API Server
 This command starts the Cortex.cpp API server at `localhost:39281`.
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
@@ -35,17 +35,38 @@ This command starts the Cortex.cpp API server at `localhost:39281`.
   </TabItem>
 </Tabs>
 
+## Pull a Model & Select Quantization
+This command allows users to download a model from these Model Hubs:
+- [Cortex Built-in Models](https://cortex.so/models)
+- [Hugging Face](https://huggingface.co) (GGUF): `cortex pull <author/ModelRepo>`
+
+It displays available quantizations, recommends a default and downloads the desired quantization. 
+<Tabs>
+  <TabItem value="MacOs/Linux" label="MacOs/Linux">
+  ```sh
+  $ cortex pull llama3.2 
+  $ cortex pull bartowski/Meta-Llama-3.1-8B-Instruct-GGUF
+  ```
+  </TabItem>
+  <TabItem value="Windows" label="Windows">
+  ```sh
+  $ cortex pull llama3.2 
+  $ cortex.exe pull bartowski/Meta-Llama-3.1-8B-Instruct-GGUF
+  ```
+  </TabItem>
+</Tabs>
+
 ## Run a Model
 This command downloads the default `gguf` model format from the [Cortex Hub](https://huggingface.co/cortexso), starts the model, and chat with the model.
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  cortex run mistral
+  cortex run llama3.2
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  cortex.exe run mistral
+  cortex.exe run llama3.2
   ```
   </TabItem>
 </Tabs>
@@ -78,33 +99,49 @@ curl http://localhost:39281/v1/chat/completions \
   "top_p": 1
 }'
 ```
+Refer to our [API documentation](https://cortex.so/api-reference) for more details.
+
+## Show the System State
+This command displays the running model and the hardware system status (RAM, Engine, VRAM, Uptime)
+<Tabs>
+  <TabItem value="MacOs/Linux" label="MacOs/Linux">
+  ```sh
+  cortex ps
+  ```
+  </TabItem>
+  <TabItem value="Windows" label="Windows">
+  ```sh
+  cortex.exe ps
+  ```
+  </TabItem>
+</Tabs>
 
 ## Stop a Model
 This command stops the running model.
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  cortex models stop mistral
+  cortex models stop llama3.2
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  cortex.exe models stop mistral
+  cortex.exe models stop llama3.2
   ```
   </TabItem>
 </Tabs>
 
-## Show the System State
-This command displays the running model and the hardware system status.
+## Stop Cortex.cpp API Server
+This command starts the Cortex.cpp API server at `localhost:39281`.
 <Tabs>
   <TabItem value="MacOs/Linux" label="MacOs/Linux">
   ```sh
-  cortex ps
+  cortex stop
   ```
   </TabItem>
   <TabItem value="Windows" label="Windows">
   ```sh
-  cortex.exe ps
+  cortex.exe stop
   ```
   </TabItem>
 </Tabs>
@@ -137,4 +174,3 @@ Now that Cortex.cpp is set up, here are the next steps to explore:
 1. Adjust the folder path and configuration using the [`.cortexrc`](/docs/basic-usage/cortexrc) file.
 2. Explore the Cortex.cpp [data folder](/docs/data-folder) to understand how it stores data.
 3. Learn about the structure of the [`model.yaml`](/docs/model-yaml) file in Cortex.cpp.
-4. Integrate Cortex.cpp [libraries](/docs/category/libraries) seamlessly into your Python or JavaScript applications.

From 0a56d50be916a160c213f19cd0c4b59b27b47906 Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Wed, 30 Oct 2024 19:46:25 +0800
Subject: [PATCH 09/10] Overview - models

---
 docs/docs/overview.mdx | 104 ++++++++++++++++++++++++++---------------
 1 file changed, 67 insertions(+), 37 deletions(-)

diff --git a/docs/docs/overview.mdx b/docs/docs/overview.mdx
index 25be18b72..89463070c 100644
--- a/docs/docs/overview.mdx
+++ b/docs/docs/overview.mdx
@@ -10,39 +10,82 @@ import TabItem from "@theme/TabItem";
 
 # Cortex
 
-:::warning
-🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase.
+:::info
+**Real-world Use**: Cortex.cpp powers [Jan](https://jan.ai), our on-device ChatGPT-alternative.
+
+Cortex.cpp is in active development. If you have any questions, please reach out to us on [GitHub](https://github.com/janhq/cortex.cpp/issues/new/choose)
+or [Discord](https://discord.com/invite/FTk2MvZwJH)
 :::
 
 ![Cortex Cover Image](/img/social-card.jpg)
 
-Cortex.cpp lets you run AI easily on your computer. 
-
-Cortex.cpp is a C++ command-line interface (CLI) designed as an alternative to Ollama. By default, it runs on the `llama.cpp` engine but also supports other engines, including `ONNX` and `TensorRT-LLM`, making it a multi-engine platform.
+Cortex is a Local AI API Platform that is used to run and customize LLMs. 
 
-## Supported Accelerators
-- Nvidia CUDA
-- Apple Metal
-- Qualcomm AI Engine
+Key Features:
+- Straightforward CLI (inspired by Ollama)
+- Full C++ implementation, packageable into Desktop and Mobile apps
+- Pull from Huggingface, or Cortex Built-in Model Library
+- Models stored in universal file formats (vs blobs)
+- Swappable Inference Backends (default: [`llamacpp`](https://github.com/janhq/cortex.llamacpp), future: [`ONNXRuntime`](https://github.com/janhq/cortex.onnx), [`TensorRT-LLM`](https://github.com/janhq/cortex.tensorrt-llm))
+- Cortex can be deployed as a standalone API server, or integrated into apps like [Jan.ai](https://jan.ai/)
 
-## Supported Inference Backends
-- [llama.cpp](https://github.com/ggerganov/llama.cpp): cross-platform, supports most laptops, desktops and OSes
-- [ONNX Runtime](https://github.com/microsoft/onnxruntime): supports Windows Copilot+ PCs & NPUs
-- [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM): supports Nvidia GPUs 
-
-If GPU hardware is available, Cortex is GPU accelerated by default.
-
-:::info
-**Real-world Use**: Cortex.cpp powers [Jan](https://jan.ai), our on-device ChatGPT-alternative.
+Cortex's roadmap is to implement the full OpenAI API including Tools, Runs, Multi-modal and Realtime APIs.
 
-Cortex.cpp has been battle-tested across 1 million+ downloads and handles a variety of hardware configurations.
-:::
 
-## Supported Models
+## Inference Backends
+- Default: [llama.cpp](https://github.com/ggerganov/llama.cpp): cross-platform, supports most laptops, desktops and OSes
+- Future: [ONNX Runtime](https://github.com/microsoft/onnxruntime): supports Windows Copilot+ PCs & NPUs
+- Future: [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM): supports Nvidia GPUs 
 
-Cortex.cpp supports the following list of [Built-in Models](/models):
+If GPU hardware is available, Cortex is GPU accelerated by default.
 
-<Tabs>
+## Models
+Cortex.cpp allows users to pull models from multiple Model Hubs, offering flexibility and extensive model access. 
+- [Hugging Face](https://huggingface.co)
+- [Cortex Built-in Models](https://cortex.so/models)
+
+> **Note**:
+> As a very general guide: You should have >8 GB of RAM available to run the 7B models, 16 GB to run the 14B models, and 32 GB to run the 32B models.
+
+### Cortex Built-in Models & Quantizations
+| Model /Engine  | llama.cpp             | Command                       |
+| -------------- | --------------------- | ----------------------------- |
+| phi-3.5        | ✅                    | cortex run phi3.5             |
+| llama3.2       | ✅                    | cortex run llama3.1           |
+| llama3.1       | ✅                    | cortex run llama3.1           |
+| codestral      | ✅                    | cortex run codestral          |
+| gemma2         | ✅                    | cortex run gemma2             |
+| mistral        | ✅                    | cortex run mistral            |
+| ministral      | ✅                    | cortex run ministral          |
+| qwen2          | ✅                    | cortex run qwen2.5            |
+| openhermes-2.5 | ✅                    | cortex run openhermes-2.5     |
+| tinyllama      | ✅                    | cortex run tinyllama          |
+
+View all [Cortex Built-in Models](https://cortex.so/models).
+
+Cortex supports multiple quantizations for each model.
+```
+❯ cortex-nightly pull llama3.2
+Downloaded models:
+    llama3.2:3b-gguf-q2-k
+
+Available to download:
+    1. llama3.2:3b-gguf-q3-kl
+    2. llama3.2:3b-gguf-q3-km
+    3. llama3.2:3b-gguf-q3-ks
+    4. llama3.2:3b-gguf-q4-km (default)
+    5. llama3.2:3b-gguf-q4-ks
+    6. llama3.2:3b-gguf-q5-km
+    7. llama3.2:3b-gguf-q5-ks
+    8. llama3.2:3b-gguf-q6-k
+    9. llama3.2:3b-gguf-q8-0
+
+Select a model (1-9): 
+```
+
+
+{/*
+ <Tabs>
  <TabItem  value="Llama.cpp" label="Llama.cpp" default>
 | Model ID         | Variant (Branch) | Model size        | CLI command                        |
 |------------------|------------------|-------------------|------------------------------------|
@@ -86,17 +129,4 @@ Cortex.cpp supports the following list of [Built-in Models](/models):
 | openhermes-2.5   | 7b-tensorrt-llm-linux-ada   | 7B                | `cortex run openhermes-2.5:7b-tensorrt-llm-linux-ada`|
 
   </TabItem>
-</Tabs>
-:::info
-Cortex.cpp supports pulling `GGUF` and `ONNX` models from the [Hugging Face Hub](https://huggingface.co). Read how to [Pull models from Hugging Face](/docs/hub/hugging-face/)
-:::
-
-## Cortex.cpp Versions
-Cortex.cpp offers three different versions of the app, each serving a unique purpose:
-- **Stable**: The official release version of Cortex.cpp, designed for general use with proven stability.
-- **Beta**: This version includes upcoming features still in testing, allowing users to try new functionality before the next official release.
-- **Nightly**:  Automatically built every night, this version includes the latest updates and changes from the engineering team but may be unstable.
-
-:::info
-Each of these versions has a different CLI prefix command.
-:::
\ No newline at end of file
+</Tabs> */}
\ No newline at end of file

From 2c44b1e8e46d5e29bce6c47c99e57cfd055bdcd8 Mon Sep 17 00:00:00 2001
From: Gabrielle Ong <gabbyong@gmail.com>
Date: Wed, 30 Oct 2024 19:46:55 +0800
Subject: [PATCH 10/10] chore: readme

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 393d427e6..546622bbc 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ Cortex is a Local AI API Platform that is used to run and customize LLMs.
 Key Features:
 - Straightforward CLI (inspired by Ollama)
 - Full C++ implementation, packageable into Desktop and Mobile apps
-- Pull from Huggingface of Cortex Built-in Model Library
+- Pull from Huggingface, or Cortex Built-in Models
 - Models stored in universal file formats (vs blobs)
 - Swappable Engines (default: [`llamacpp`](https://github.com/janhq/cortex.llamacpp), future: [`ONNXRuntime`](https://github.com/janhq/cortex.onnx), [`TensorRT-LLM`](https://github.com/janhq/cortex.tensorrt-llm))
 - Cortex can be deployed as a standalone API server, or integrated into apps like [Jan.ai](https://jan.ai/)
@@ -88,14 +88,14 @@ Refer to our [Quickstart](https://cortex.so/docs/quickstart/) and
 ### API:
 Cortex.cpp includes a REST API accessible at `localhost:39281`.
 
-Refer to our [API documentation](https://cortex.so/api-reference) for more details
+Refer to our [API documentation](https://cortex.so/api-reference) for more details.
 
-## Models & Quantizations
+## Models
 
 Cortex.cpp allows users to pull models from multiple Model Hubs, offering flexibility and extensive model access. 
 
 Currently Cortex supports pulling from:
-- Hugging Face: GGUF models eg `author/Model-GGUF`
+- [Hugging Face](https://huggingface.co): GGUF models eg `author/Model-GGUF`
 - Cortex Built-in Models 
 
 Once downloaded, the model `.gguf` and `model.yml` files are stored in `~\cortexcpp\models`.
@@ -103,7 +103,7 @@ Once downloaded, the model `.gguf` and `model.yml` files are stored in `~\cortex
 > **Note**:
 > You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 14B models, and 32 GB to run the 32B models.
 
-### Cortex Model Hub & Quantizations
+### Cortex Built-in Models & Quantizations
 
 | Model /Engine  | llama.cpp             | Command                       |
 | -------------- | --------------------- | ----------------------------- |