From 616fbe81c6ca2fadba9aa6022deabe8b984bebd1 Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Tue, 29 Oct 2024 17:29:39 +0800 Subject: [PATCH 01/10] chore: bug report template --- .github/ISSUE_TEMPLATE/bug_report.yml | 35 +++++++++++++++++++++------ 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index d0182a83a..2306aaf2f 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -9,14 +9,14 @@ body: required: true attributes: label: "Cortex version" - description: "**Tip:** The version is in the app's bottom right corner" - + description: "**Tip:** `cortex -v` outputs the version number" + - type: textarea validations: required: true attributes: - label: "Describe the Bug" - description: "A clear & concise description of the bug" + label: "Describe the issue and expected behaviour" + description: "A clear & concise description of the issue encountered" - type: textarea attributes: @@ -28,18 +28,28 @@ body: 2. Click on '...' - type: textarea + validations: + required: true attributes: label: "Screenshots / Logs" description: | - You can find logs in: ~/cortex/logs + You can find cortex-cli.log and cortex.log files in: ~/cortex/logs/ - type: checkboxes attributes: label: "What is your OS?" options: - - label: MacOS - label: Windows - - label: Linux + - label: Mac Silicon + - label: Mac Intel + - label: Linux / Ubuntu + + - type: input + validations: + required: true + attributes: + label: "What OS Version are you running?" + description: "Eg Windows 10, Ubuntu 22, Mac M1" - type: checkboxes attributes: @@ -47,4 +57,13 @@ body: options: - label: cortex.llamacpp (default) - label: cortex.tensorrt-llm (Nvidia GPUs) - - label: cortex.onnx (NPUs, DirectML) \ No newline at end of file + - label: cortex.onnx (NPUs, DirectML) + + - type: input + validations: + required: true + attributes: + label: "Hardware Specs eg OS version, GPU" + description: + + \ No newline at end of file From 4c3803ab82914f8c07e78797be488d5099ded227 Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Tue, 29 Oct 2024 17:29:58 +0800 Subject: [PATCH 02/10] chore: bug report template --- .github/ISSUE_TEMPLATE/bug_report.yml | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 2306aaf2f..6d4cae367 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -28,12 +28,10 @@ body: 2. Click on '...' - type: textarea - validations: - required: true attributes: label: "Screenshots / Logs" description: | - You can find cortex-cli.log and cortex.log files in: ~/cortex/logs/ + Please include cortex-cli.log and cortex.log files in: ~/cortex/logs/ - type: checkboxes attributes: @@ -44,13 +42,6 @@ body: - label: Mac Intel - label: Linux / Ubuntu - - type: input - validations: - required: true - attributes: - label: "What OS Version are you running?" - description: "Eg Windows 10, Ubuntu 22, Mac M1" - - type: checkboxes attributes: label: "What engine are you running?" From 6c532829cf6bdc14fba9dc9e8b117a18fad379da Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Tue, 29 Oct 2024 19:37:32 +0800 Subject: [PATCH 03/10] chore: remove embeddings from sidebar --- docs/sidebars.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 09f2fb298..d5d7fd020 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -186,7 +186,7 @@ const sidebars: SidebarsConfig = { { type: "doc", id: "cli/cortex", label: "cortex" }, { type: "doc", id: "cli/start", label: "cortex start" }, { type: "doc", id: "cli/chat", label: "cortex chat" }, - { type: "doc", id: "cli/embeddings", label: "cortex embeddings" }, + // { type: "doc", id: "cli/embeddings", label: "cortex embeddings" }, // { type: "doc", id: "cli/presets", label: "cortex presets" }, { type: "doc", id: "cli/pull", label: "cortex pull" }, { type: "doc", id: "cli/run", label: "cortex run" }, From 2e66496413bb1b332ac4d84290a6df8bd99f670c Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Wed, 30 Oct 2024 18:27:46 +0800 Subject: [PATCH 04/10] models page: remove onnx trt toggle --- docs/src/pages/models.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/pages/models.tsx b/docs/src/pages/models.tsx index c9937c8b0..cb0e4c9e0 100644 --- a/docs/src/pages/models.tsx +++ b/docs/src/pages/models.tsx @@ -135,7 +135,7 @@ const ModelsPage = () => { Cortex has a built-in model collection of popular models.

- handleChange(value)}> @@ -150,7 +150,7 @@ const ModelsPage = () => { ONNX - + */}
From b600af958f514ac4d72c661ef3d14d7b6ee078b4 Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Wed, 30 Oct 2024 18:28:26 +0800 Subject: [PATCH 05/10] hero logo: remove onnx trt, add model variants --- .../Homepage/SimpleHeroSection/index.tsx | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/docs/src/containers/Homepage/SimpleHeroSection/index.tsx b/docs/src/containers/Homepage/SimpleHeroSection/index.tsx index 5ee2cd6ec..ee0584d08 100644 --- a/docs/src/containers/Homepage/SimpleHeroSection/index.tsx +++ b/docs/src/containers/Homepage/SimpleHeroSection/index.tsx @@ -52,11 +52,24 @@ const SimpleHeroSection = () => {

# Run Local LLMs

- cortex  - run  - llama3.1 + + cortex run + llama3.2

-

+

Available to download:

+

    1. llama3.2:3b-gguf-q2-k

+

    2. llama3.2:3b-gguf-q3-kl

+

    3. llama3.2:3b-gguf-q3-km

+

    4. llama3.2:3b-gguf-q3-ks

+

    5. llama3.2:3b-gguf-q4-km (default)

+

    6. llama3.2:3b-gguf-q4-ks

+

    7. llama3.2:3b-gguf-q5-km

+

    8. llama3.2:3b-gguf-q5-ks

+

    9. llama3.2:3b-gguf-q6-k

+

    10. llama3.2:3b-gguf-q8-0

+

+

Select a model (1-10): 5

+ {/*

cortex  run  llama3.1:tensorrt-llm @@ -65,7 +78,7 @@ const SimpleHeroSection = () => { cortex  run  llama3.1:onnx -

+

*/}
From 280c94bf7170fe96cd24b6ceacd32a6731fb5490 Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Wed, 30 Oct 2024 18:58:10 +0800 Subject: [PATCH 06/10] Remove discord links --- docs/docs/architecture.mdx | 4 ---- docs/docs/troubleshooting.mdx | 6 +----- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/docs/docs/architecture.mdx b/docs/docs/architecture.mdx index f2cd598c2..852d7b7bc 100644 --- a/docs/docs/architecture.mdx +++ b/docs/docs/architecture.mdx @@ -148,7 +148,3 @@ Our development roadmap outlines key features and epics we will focus on in the - **RAG**: Improve response quality and contextual relevance in our AI models. - **Cortex Python Runtime**: Provide a scalable Python execution environment for Cortex. - -:::info -For a full list of Cortex development roadmap, please see [here](https://discord.com/channels/1107178041848909847/1230770299730001941). -::: diff --git a/docs/docs/troubleshooting.mdx b/docs/docs/troubleshooting.mdx index e50ab115b..34831e10a 100644 --- a/docs/docs/troubleshooting.mdx +++ b/docs/docs/troubleshooting.mdx @@ -152,8 +152,4 @@ lsof -i :1337 kill -9 [pid] ``` - - -:::info -For additional issues not listed above, please contact us on [Discord](https://discord.com/channels/1107178041848909847/1267305972733444147) or submit a [GitHub issue](https://github.com/janhq/cortex/issues). Include your `~/cortex/cortex.log` or an error snippet. -::: + \ No newline at end of file From 9b7eb44523003d83b26c75d5821b9b917d1f4379 Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Wed, 30 Oct 2024 19:06:21 +0800 Subject: [PATCH 07/10] Remove beta/nightly/onnx/trt from quickstart --- docs/docs/quickstart.mdx | 164 +++++---------------------------------- 1 file changed, 18 insertions(+), 146 deletions(-) diff --git a/docs/docs/quickstart.mdx b/docs/docs/quickstart.mdx index 0dea487ec..c0ba8e2e9 100644 --- a/docs/docs/quickstart.mdx +++ b/docs/docs/quickstart.mdx @@ -8,76 +8,51 @@ import Tabs from "@theme/Tabs"; import TabItem from "@theme/TabItem"; -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +:::info +Cortex.cpp is in active development. If you have any questions, please reach out to us: +- [GitHub](https://github.com/janhq/cortex.cpp/issues/new/choose) +- [Discord](https://discord.com/invite/FTk2MvZwJH) ::: -## Installation -To install Cortex, download the installer for your operating system from the following options: -- **Stable Version** - - [Windows](https://github.com/janhq/cortex.cpp/releases) - - [Mac](https://github.com/janhq/cortex.cpp/releases) - - [Linux (Debian)](https://github.com/janhq/cortex.cpp/releases) - - [Linux (Fedora)](https://github.com/janhq/cortex.cpp/releases) +## Local Installation +Cortex has an Local Installer that packages all required dependencies, so that no internet connection is required during the installation process. + - [Windows](https://app.cortexcpp.com/download/latest/windows-amd64-local) + - [Mac (Universal)](https://app.cortexcpp.com/download/latest/mac-universal-local) + - [Linux](https://app.cortexcpp.com/download/latest/linux-amd64-local) + ## Start Cortex.cpp Processes and API Server This command starts the Cortex.cpp API server at `localhost:39281`. ```sh - # Stable cortex start - - # Beta - cortex-beta start - - # Nightly - cortex-nightly start ``` ```sh - # Stable cortex.exe start - - # Beta - cortex-beta.exe start - - # Nightly - cortex-nightly.exe start ``` + ## Run a Model This command downloads the default `gguf` model format from the [Cortex Hub](https://huggingface.co/cortexso), starts the model, and chat with the model. ```sh - # Stable cortex run mistral - - # Beta - cortex-beta run mistral - - # Nightly - cortex-nightly run mistral ``` ```sh - # Stable cortex.exe run mistral - - # Beta - cortex-beta.exe run mistral - - # Nightly - cortex-nightly.exe run mistral ``` :::info -All model files are stored in the `~users/cortex/models` folder. +All model files are stored in the `~/cortex/models` folder. ::: + ## Using the Model ### API ```curl @@ -103,153 +78,58 @@ curl http://localhost:39281/v1/chat/completions \ "top_p": 1 }' ``` -### Cortex.js -```js -const resp = await cortex.chat.completions.create({ - model: "mistral", - messages: [ - { role: "system", content: "You are a chatbot." }, - { role: "user", content: "What is the capital of the United States?" }, - ], - }); -``` -### Cortex.py -```py -completion = client.chat.completions.create( - model=mistral, - messages=[ - { - "role": "user", - "content": "Say this is a test", - }, - ], -) -``` + ## Stop a Model This command stops the running model. ```sh - # Stable cortex models stop mistral - - # Beta - cortex-beta models stop mistral - - # Nightly - cortex-nightly models stop mistral ``` ```sh - # Stable cortex.exe models stop mistral - - # Beta - cortex-beta.exe models stop mistral - - # Nightly - cortex-nightly.exe models stop mistral ``` + ## Show the System State This command displays the running model and the hardware system status. ```sh - # Stable cortex ps - - # Beta - cortex-beta ps - - # Nightly - cortex-nightly ps ``` ```sh - # Stable cortex.exe ps - - # Beta - cortex-beta.exe ps - - # Nightly - cortex-nightly.exe ps ``` -## Run Different Model Variants + + ## What's Next? Now that Cortex.cpp is set up, here are the next steps to explore: @@ -258,11 +138,3 @@ Now that Cortex.cpp is set up, here are the next steps to explore: 2. Explore the Cortex.cpp [data folder](/docs/data-folder) to understand how it stores data. 3. Learn about the structure of the [`model.yaml`](/docs/model-yaml) file in Cortex.cpp. 4. Integrate Cortex.cpp [libraries](/docs/category/libraries) seamlessly into your Python or JavaScript applications. - - -:::info -Cortex.cpp is still in early development, so if you have any questions, please reach out to us: - -- [GitHub](https://github.com/janhq/cortex) -- [Discord](https://discord.gg/YFKKeuVu) - ::: From e6a5548c18cb3da2e00379f5a1bff2255cc3c65b Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Wed, 30 Oct 2024 19:46:12 +0800 Subject: [PATCH 08/10] Quickstart additional commands --- docs/docs/quickstart.mdx | 56 +++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/docs/docs/quickstart.mdx b/docs/docs/quickstart.mdx index c0ba8e2e9..687707c66 100644 --- a/docs/docs/quickstart.mdx +++ b/docs/docs/quickstart.mdx @@ -20,7 +20,7 @@ Cortex has an Local Installer that packages all required dependencies, so that n - [Mac (Universal)](https://app.cortexcpp.com/download/latest/mac-universal-local) - [Linux](https://app.cortexcpp.com/download/latest/linux-amd64-local) -## Start Cortex.cpp Processes and API Server +## Start Cortex.cpp API Server This command starts the Cortex.cpp API server at `localhost:39281`. @@ -35,17 +35,38 @@ This command starts the Cortex.cpp API server at `localhost:39281`. +## Pull a Model & Select Quantization +This command allows users to download a model from these Model Hubs: +- [Cortex Built-in Models](https://cortex.so/models) +- [Hugging Face](https://huggingface.co) (GGUF): `cortex pull ` + +It displays available quantizations, recommends a default and downloads the desired quantization. + + + ```sh + $ cortex pull llama3.2 + $ cortex pull bartowski/Meta-Llama-3.1-8B-Instruct-GGUF + ``` + + + ```sh + $ cortex pull llama3.2 + $ cortex.exe pull bartowski/Meta-Llama-3.1-8B-Instruct-GGUF + ``` + + + ## Run a Model This command downloads the default `gguf` model format from the [Cortex Hub](https://huggingface.co/cortexso), starts the model, and chat with the model. ```sh - cortex run mistral + cortex run llama3.2 ``` ```sh - cortex.exe run mistral + cortex.exe run llama3.2 ``` @@ -78,33 +99,49 @@ curl http://localhost:39281/v1/chat/completions \ "top_p": 1 }' ``` +Refer to our [API documentation](https://cortex.so/api-reference) for more details. + +## Show the System State +This command displays the running model and the hardware system status (RAM, Engine, VRAM, Uptime) + + + ```sh + cortex ps + ``` + + + ```sh + cortex.exe ps + ``` + + ## Stop a Model This command stops the running model. ```sh - cortex models stop mistral + cortex models stop llama3.2 ``` ```sh - cortex.exe models stop mistral + cortex.exe models stop llama3.2 ``` -## Show the System State -This command displays the running model and the hardware system status. +## Stop Cortex.cpp API Server +This command starts the Cortex.cpp API server at `localhost:39281`. ```sh - cortex ps + cortex stop ``` ```sh - cortex.exe ps + cortex.exe stop ``` @@ -137,4 +174,3 @@ Now that Cortex.cpp is set up, here are the next steps to explore: 1. Adjust the folder path and configuration using the [`.cortexrc`](/docs/basic-usage/cortexrc) file. 2. Explore the Cortex.cpp [data folder](/docs/data-folder) to understand how it stores data. 3. Learn about the structure of the [`model.yaml`](/docs/model-yaml) file in Cortex.cpp. -4. Integrate Cortex.cpp [libraries](/docs/category/libraries) seamlessly into your Python or JavaScript applications. From 0a56d50be916a160c213f19cd0c4b59b27b47906 Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Wed, 30 Oct 2024 19:46:25 +0800 Subject: [PATCH 09/10] Overview - models --- docs/docs/overview.mdx | 104 ++++++++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 37 deletions(-) diff --git a/docs/docs/overview.mdx b/docs/docs/overview.mdx index 25be18b72..89463070c 100644 --- a/docs/docs/overview.mdx +++ b/docs/docs/overview.mdx @@ -10,39 +10,82 @@ import TabItem from "@theme/TabItem"; # Cortex -:::warning -🚧 Cortex.cpp is currently under development. Our documentation outlines the intended behavior of Cortex, which may not yet be fully implemented in the codebase. +:::info +**Real-world Use**: Cortex.cpp powers [Jan](https://jan.ai), our on-device ChatGPT-alternative. + +Cortex.cpp is in active development. If you have any questions, please reach out to us on [GitHub](https://github.com/janhq/cortex.cpp/issues/new/choose) +or [Discord](https://discord.com/invite/FTk2MvZwJH) ::: ![Cortex Cover Image](/img/social-card.jpg) -Cortex.cpp lets you run AI easily on your computer. - -Cortex.cpp is a C++ command-line interface (CLI) designed as an alternative to Ollama. By default, it runs on the `llama.cpp` engine but also supports other engines, including `ONNX` and `TensorRT-LLM`, making it a multi-engine platform. +Cortex is a Local AI API Platform that is used to run and customize LLMs. -## Supported Accelerators -- Nvidia CUDA -- Apple Metal -- Qualcomm AI Engine +Key Features: +- Straightforward CLI (inspired by Ollama) +- Full C++ implementation, packageable into Desktop and Mobile apps +- Pull from Huggingface, or Cortex Built-in Model Library +- Models stored in universal file formats (vs blobs) +- Swappable Inference Backends (default: [`llamacpp`](https://github.com/janhq/cortex.llamacpp), future: [`ONNXRuntime`](https://github.com/janhq/cortex.onnx), [`TensorRT-LLM`](https://github.com/janhq/cortex.tensorrt-llm)) +- Cortex can be deployed as a standalone API server, or integrated into apps like [Jan.ai](https://jan.ai/) -## Supported Inference Backends -- [llama.cpp](https://github.com/ggerganov/llama.cpp): cross-platform, supports most laptops, desktops and OSes -- [ONNX Runtime](https://github.com/microsoft/onnxruntime): supports Windows Copilot+ PCs & NPUs -- [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM): supports Nvidia GPUs - -If GPU hardware is available, Cortex is GPU accelerated by default. - -:::info -**Real-world Use**: Cortex.cpp powers [Jan](https://jan.ai), our on-device ChatGPT-alternative. +Cortex's roadmap is to implement the full OpenAI API including Tools, Runs, Multi-modal and Realtime APIs. -Cortex.cpp has been battle-tested across 1 million+ downloads and handles a variety of hardware configurations. -::: -## Supported Models +## Inference Backends +- Default: [llama.cpp](https://github.com/ggerganov/llama.cpp): cross-platform, supports most laptops, desktops and OSes +- Future: [ONNX Runtime](https://github.com/microsoft/onnxruntime): supports Windows Copilot+ PCs & NPUs +- Future: [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM): supports Nvidia GPUs -Cortex.cpp supports the following list of [Built-in Models](/models): +If GPU hardware is available, Cortex is GPU accelerated by default. - +## Models +Cortex.cpp allows users to pull models from multiple Model Hubs, offering flexibility and extensive model access. +- [Hugging Face](https://huggingface.co) +- [Cortex Built-in Models](https://cortex.so/models) + +> **Note**: +> As a very general guide: You should have >8 GB of RAM available to run the 7B models, 16 GB to run the 14B models, and 32 GB to run the 32B models. + +### Cortex Built-in Models & Quantizations +| Model /Engine | llama.cpp | Command | +| -------------- | --------------------- | ----------------------------- | +| phi-3.5 | ✅ | cortex run phi3.5 | +| llama3.2 | ✅ | cortex run llama3.1 | +| llama3.1 | ✅ | cortex run llama3.1 | +| codestral | ✅ | cortex run codestral | +| gemma2 | ✅ | cortex run gemma2 | +| mistral | ✅ | cortex run mistral | +| ministral | ✅ | cortex run ministral | +| qwen2 | ✅ | cortex run qwen2.5 | +| openhermes-2.5 | ✅ | cortex run openhermes-2.5 | +| tinyllama | ✅ | cortex run tinyllama | + +View all [Cortex Built-in Models](https://cortex.so/models). + +Cortex supports multiple quantizations for each model. +``` +❯ cortex-nightly pull llama3.2 +Downloaded models: + llama3.2:3b-gguf-q2-k + +Available to download: + 1. llama3.2:3b-gguf-q3-kl + 2. llama3.2:3b-gguf-q3-km + 3. llama3.2:3b-gguf-q3-ks + 4. llama3.2:3b-gguf-q4-km (default) + 5. llama3.2:3b-gguf-q4-ks + 6. llama3.2:3b-gguf-q5-km + 7. llama3.2:3b-gguf-q5-ks + 8. llama3.2:3b-gguf-q6-k + 9. llama3.2:3b-gguf-q8-0 + +Select a model (1-9): +``` + + +{/* + | Model ID | Variant (Branch) | Model size | CLI command | |------------------|------------------|-------------------|------------------------------------| @@ -86,17 +129,4 @@ Cortex.cpp supports the following list of [Built-in Models](/models): | openhermes-2.5 | 7b-tensorrt-llm-linux-ada | 7B | `cortex run openhermes-2.5:7b-tensorrt-llm-linux-ada`| - -:::info -Cortex.cpp supports pulling `GGUF` and `ONNX` models from the [Hugging Face Hub](https://huggingface.co). Read how to [Pull models from Hugging Face](/docs/hub/hugging-face/) -::: - -## Cortex.cpp Versions -Cortex.cpp offers three different versions of the app, each serving a unique purpose: -- **Stable**: The official release version of Cortex.cpp, designed for general use with proven stability. -- **Beta**: This version includes upcoming features still in testing, allowing users to try new functionality before the next official release. -- **Nightly**: Automatically built every night, this version includes the latest updates and changes from the engineering team but may be unstable. - -:::info -Each of these versions has a different CLI prefix command. -::: \ No newline at end of file + */} \ No newline at end of file From 2c44b1e8e46d5e29bce6c47c99e57cfd055bdcd8 Mon Sep 17 00:00:00 2001 From: Gabrielle Ong Date: Wed, 30 Oct 2024 19:46:55 +0800 Subject: [PATCH 10/10] chore: readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 393d427e6..546622bbc 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Cortex is a Local AI API Platform that is used to run and customize LLMs. Key Features: - Straightforward CLI (inspired by Ollama) - Full C++ implementation, packageable into Desktop and Mobile apps -- Pull from Huggingface of Cortex Built-in Model Library +- Pull from Huggingface, or Cortex Built-in Models - Models stored in universal file formats (vs blobs) - Swappable Engines (default: [`llamacpp`](https://github.com/janhq/cortex.llamacpp), future: [`ONNXRuntime`](https://github.com/janhq/cortex.onnx), [`TensorRT-LLM`](https://github.com/janhq/cortex.tensorrt-llm)) - Cortex can be deployed as a standalone API server, or integrated into apps like [Jan.ai](https://jan.ai/) @@ -88,14 +88,14 @@ Refer to our [Quickstart](https://cortex.so/docs/quickstart/) and ### API: Cortex.cpp includes a REST API accessible at `localhost:39281`. -Refer to our [API documentation](https://cortex.so/api-reference) for more details +Refer to our [API documentation](https://cortex.so/api-reference) for more details. -## Models & Quantizations +## Models Cortex.cpp allows users to pull models from multiple Model Hubs, offering flexibility and extensive model access. Currently Cortex supports pulling from: -- Hugging Face: GGUF models eg `author/Model-GGUF` +- [Hugging Face](https://huggingface.co): GGUF models eg `author/Model-GGUF` - Cortex Built-in Models Once downloaded, the model `.gguf` and `model.yml` files are stored in `~\cortexcpp\models`. @@ -103,7 +103,7 @@ Once downloaded, the model `.gguf` and `model.yml` files are stored in `~\cortex > **Note**: > You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 14B models, and 32 GB to run the 32B models. -### Cortex Model Hub & Quantizations +### Cortex Built-in Models & Quantizations | Model /Engine | llama.cpp | Command | | -------------- | --------------------- | ----------------------------- |