From 166cdb5c6da4cf684d5ffe78defbb576330e8b2d Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Fri, 1 Nov 2024 09:12:05 +0700
Subject: [PATCH 1/7] fix: use download event type to listen ws on client side
 (#1601)

* fix: use download event type to listen ws on client side

* fix: format

* fix: remove unused

---------

Co-authored-by: vansangpfiev <sang@jan.ai>
---
 engine/cli/commands/engine_install_cmd.cc |  7 +++---
 engine/cli/commands/model_pull_cmd.cc     |  2 +-
 engine/cli/utils/download_progress.cc     | 29 ++++++++++++++++-------
 engine/cli/utils/download_progress.h      |  2 +-
 4 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/engine/cli/commands/engine_install_cmd.cc b/engine/cli/commands/engine_install_cmd.cc
index a0d008c60..f046d89e1 100644
--- a/engine/cli/commands/engine_install_cmd.cc
+++ b/engine/cli/commands/engine_install_cmd.cc
@@ -35,8 +35,9 @@ bool EngineInstallCmd::Exec(const std::string& engine,
   DownloadProgress dp;
   dp.Connect(host_, port_);
   // engine can be small, so need to start ws first
-  auto dp_res = std::async(std::launch::deferred,
-                           [&dp, &engine] { return dp.Handle(engine); });
+  auto dp_res = std::async(std::launch::deferred, [&dp] {
+    return dp.Handle(DownloadType::Engine);
+  });
   CLI_LOG("Validating download items, please wait..")
 
   httplib::Client cli(host_ + ":" + std::to_string(port_));
@@ -68,7 +69,7 @@ bool EngineInstallCmd::Exec(const std::string& engine,
 
   bool check_cuda_download = !system_info_utils::GetCudaVersion().empty();
   if (check_cuda_download) {
-    if (!dp.Handle("cuda"))
+    if (!dp.Handle(DownloadType::CudaToolkit))
       return false;
   }
 
diff --git a/engine/cli/commands/model_pull_cmd.cc b/engine/cli/commands/model_pull_cmd.cc
index 8d6757d61..ad8938146 100644
--- a/engine/cli/commands/model_pull_cmd.cc
+++ b/engine/cli/commands/model_pull_cmd.cc
@@ -149,7 +149,7 @@ std::optional<std::string> ModelPullCmd::Exec(const std::string& host, int port,
       reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
 #endif
   dp.Connect(host, port);
-  if (!dp.Handle(model_id))
+  if (!dp.Handle(DownloadType::Model))
     return std::nullopt;
   if (force_stop)
     return std::nullopt;
diff --git a/engine/cli/utils/download_progress.cc b/engine/cli/utils/download_progress.cc
index e77e43beb..9c38d4bdf 100644
--- a/engine/cli/utils/download_progress.cc
+++ b/engine/cli/utils/download_progress.cc
@@ -4,10 +4,23 @@
 #include "common/event.h"
 #include "indicators/dynamic_progress.hpp"
 #include "indicators/progress_bar.hpp"
+#include "utils/engine_constants.h"
 #include "utils/format_utils.h"
 #include "utils/json_helper.h"
 #include "utils/logging_utils.h"
 
+namespace {
+std::string Repo2Engine(const std::string& r) {
+  if (r == kLlamaRepo) {
+    return kLlamaEngine;
+  } else if (r == kOnnxRepo) {
+    return kOnnxEngine;
+  } else if (r == kTrtLlmRepo) {
+    return kTrtLlmEngine;
+  }
+  return r;
+};
+}  // namespace
 bool DownloadProgress::Connect(const std::string& host, int port) {
   if (ws_) {
     CTL_INF("Already connected!");
@@ -21,7 +34,7 @@ bool DownloadProgress::Connect(const std::string& host, int port) {
   return true;
 }
 
-bool DownloadProgress::Handle(const std::string& id) {
+bool DownloadProgress::Handle(const DownloadType& event_type) {
   assert(!!ws_);
   std::unordered_map<std::string, uint64_t> totals;
   status_ = DownloadStatus::DownloadStarted;
@@ -30,7 +43,7 @@ bool DownloadProgress::Handle(const std::string& id) {
   std::vector<std::unique_ptr<indicators::ProgressBar>> items;
   indicators::show_console_cursor(false);
   auto handle_message = [this, &bars, &items, &totals,
-                         id](const std::string& message) {
+                         event_type](const std::string& message) {
     CTL_INF(message);
 
     auto pad_string = [](const std::string& str,
@@ -50,8 +63,8 @@ bool DownloadProgress::Handle(const std::string& id) {
 
     auto ev = cortex::event::GetDownloadEventFromJson(
         json_helper::ParseJsonString(message));
-    // Ignore other task ids
-    if (ev.download_task_.id != id) {
+    // Ignore other task type
+    if (ev.download_task_.type != event_type) {
       return;
     }
 
@@ -63,7 +76,7 @@ bool DownloadProgress::Handle(const std::string& id) {
             indicators::option::BarWidth{50}, indicators::option::Start{"["},
             indicators::option::Fill{"="}, indicators::option::Lead{">"},
             indicators::option::End{"]"},
-            indicators::option::PrefixText{pad_string(i.id)},
+            indicators::option::PrefixText{pad_string(Repo2Engine(i.id))},
             indicators::option::ForegroundColor{indicators::Color::white},
             indicators::option::ShowRemainingTime{true}));
         bars->push_back(*(items.back()));
@@ -80,7 +93,7 @@ bool DownloadProgress::Handle(const std::string& id) {
       if (ev.type_ == DownloadStatus::DownloadStarted ||
           ev.type_ == DownloadStatus::DownloadUpdated) {
         (*bars)[i].set_option(indicators::option::PrefixText{
-            pad_string(it.id) +
+            pad_string(Repo2Engine(it.id)) +
             std::to_string(
                 int(static_cast<double>(downloaded) / totals[it.id] * 100)) +
             '%'});
@@ -94,8 +107,8 @@ bool DownloadProgress::Handle(const std::string& id) {
         auto total_str = format_utils::BytesToHumanReadable(totals[it.id]);
         (*bars)[i].set_option(
             indicators::option::PostfixText{total_str + "/" + total_str});
-        (*bars)[i].set_option(
-            indicators::option::PrefixText{pad_string(it.id) + "100%"});
+        (*bars)[i].set_option(indicators::option::PrefixText{
+            pad_string(Repo2Engine(it.id)) + "100%"});
         (*bars)[i].set_progress(100);
 
         CTL_INF("Download success");
diff --git a/engine/cli/utils/download_progress.h b/engine/cli/utils/download_progress.h
index 4f71e6d84..98fe85654 100644
--- a/engine/cli/utils/download_progress.h
+++ b/engine/cli/utils/download_progress.h
@@ -10,7 +10,7 @@ class DownloadProgress {
  public:
   bool Connect(const std::string& host, int port);
 
-  bool Handle(const std::string& id);
+  bool Handle(const DownloadType& event_type);
 
   void ForceStop() { force_stop_ = true; }
 

From f5fbad6d5ca9035f44c9a37f230daa5bf8b4d7c0 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Fri, 1 Nov 2024 11:26:18 +0700
Subject: [PATCH 2/7] chore: add model name as a parameter support during
 import via API (#1600)

---
 docs/static/openapi/cortex.json               | 86 +++++++++++++++++++
 engine/controllers/models.cc                  | 13 ++-
 engine/e2e-test/test_api_model_import.py      | 24 +++++-
 .../test_api_model_pull_direct_url.py         | 29 +++++--
 engine/services/model_service.cc              | 22 +++--
 engine/services/model_service.h               |  5 +-
 6 files changed, 160 insertions(+), 19 deletions(-)

diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
index 0f715456d..8b3acb0e2 100644
--- a/docs/static/openapi/cortex.json
+++ b/docs/static/openapi/cortex.json
@@ -554,6 +554,46 @@
         "tags": ["Models"]
       }
     },
+    "/v1/models/import": {
+      "post": {
+        "operationId": "ModelsController_importModel",
+        "summary": "Import model",
+        "description": "Imports a model from a specified path.",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ImportModelRequest"
+              },
+              "example": {
+                "model": "model-id",
+                "modelPath": "/path/to/gguf",
+                "name": "model display name"
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Model is imported successfully!",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ImportModelResponse"
+                },
+                "example": {
+                  "message": "Model is imported successfully!",
+                  "modelHandle": "model-id",
+                  "result": "OK"
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Models"]
+      }
+    },
     "/v1/threads": {
       "post": {
         "operationId": "ThreadsController_create",
@@ -1660,6 +1700,15 @@
                 "value": "my-custom-model-id"
               }
             ]
+          },
+          "name": {
+            "type": "string",
+            "description": "The name which will be used to overwrite the model name.",
+            "examples": [
+              {
+                "value": "my-custom-model-name"
+              }
+            ]
           }
         }
       },
@@ -1803,6 +1852,43 @@
           }
         }
       },
+      "ImportModelRequest": {
+        "type": "object",
+        "properties": {
+          "model": {
+            "type": "string",
+            "description": "The unique identifier of the model."
+          },
+          "modelPath": {
+            "type": "string",
+            "description": "The file path to the model."
+          },
+          "name": {
+            "type": "string",
+            "description": "The display name of the model."
+          }
+        },
+        "required": ["model", "modelPath"]
+      },
+      "ImportModelResponse": {
+        "type": "object",
+        "properties": {
+          "message": {
+            "type": "string",
+            "description": "Success message."
+          },
+          "modelHandle": {
+            "type": "string",
+            "description": "The unique identifier of the imported model."
+          },
+          "result": {
+            "type": "string",
+            "description": "Result status.",
+            "example": "OK"
+          }
+        },
+        "required": ["message", "modelHandle", "result"]
+      },
       "CommonResponseDto": {
         "type": "object",
         "properties": {
diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc
index 602c81ab6..939f63f31 100644
--- a/engine/controllers/models.cc
+++ b/engine/controllers/models.cc
@@ -33,12 +33,19 @@ void Models::PullModel(const HttpRequestPtr& req,
     desired_model_id = id;
   }
 
+  std::optional<std::string> desired_model_name = std::nullopt;
+  auto name_value = (*(req->getJsonObject())).get("name", "").asString();
+
+  if (!name_value.empty()) {
+    desired_model_name = name_value;
+  }
+
   auto handle_model_input =
       [&, model_handle]() -> cpp::result<DownloadTask, std::string> {
     CTL_INF("Handle model input, model handle: " + model_handle);
     if (string_utils::StartsWith(model_handle, "https")) {
-      return model_service_->HandleDownloadUrlAsync(model_handle,
-                                                    desired_model_id);
+      return model_service_->HandleDownloadUrlAsync(
+          model_handle, desired_model_id, desired_model_name);
     } else if (model_handle.find(":") != std::string::npos) {
       auto model_and_branch = string_utils::SplitBy(model_handle, ":");
       return model_service_->DownloadModelFromCortexsoAsync(
@@ -312,6 +319,7 @@ void Models::ImportModel(
   }
   auto modelHandle = (*(req->getJsonObject())).get("model", "").asString();
   auto modelPath = (*(req->getJsonObject())).get("modelPath", "").asString();
+  auto modelName = (*(req->getJsonObject())).get("name", "").asString();
   config::GGUFHandler gguf_handler;
   config::YamlHandler yaml_handler;
   cortex::db::Models modellist_utils_obj;
@@ -333,6 +341,7 @@ void Models::ImportModel(
     config::ModelConfig model_config = gguf_handler.GetModelConfig();
     model_config.files.push_back(modelPath);
     model_config.model = modelHandle;
+    model_config.name = modelName.empty() ? model_config.name : modelName;
     yaml_handler.UpdateModelConfig(model_config);
 
     if (modellist_utils_obj.AddModelEntry(model_entry).value()) {
diff --git a/engine/e2e-test/test_api_model_import.py b/engine/e2e-test/test_api_model_import.py
index 8dd34ea7a..3f8a82a0d 100644
--- a/engine/e2e-test/test_api_model_import.py
+++ b/engine/e2e-test/test_api_model_import.py
@@ -18,5 +18,25 @@ def setup_and_teardown(self):
     def test_model_import_should_be_success(self):
         body_json = {'model': 'tinyllama:gguf',
                      'modelPath': '/path/to/local/gguf'}
-        response = requests.post("http://localhost:3928/models/import", json = body_json)              
-        assert response.status_code == 200      
\ No newline at end of file
+        response = requests.post("http://localhost:3928/models/import", json=body_json)              
+        assert response.status_code == 200
+
+    @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.")
+    def test_model_import_with_name_should_be_success(self):
+        body_json = {'model': 'tinyllama:gguf',
+                     'modelPath': '/path/to/local/gguf',
+                     'name': 'test_model'}
+        response = requests.post("http://localhost:3928/models/import", json=body_json)
+        assert response.status_code == 200
+
+    def test_model_import_with_invalid_path_should_fail(self):
+        body_json = {'model': 'tinyllama:gguf',
+                     'modelPath': '/invalid/path/to/gguf'}
+        response = requests.post("http://localhost:3928/models/import", json=body_json)
+        assert response.status_code == 400
+
+    def test_model_import_with_missing_model_should_fail(self):
+        body_json = {'modelPath': '/path/to/local/gguf'}
+        response = requests.post("http://localhost:3928/models/import", json=body_json)
+        print(response)
+        assert response.status_code == 409
\ No newline at end of file
diff --git a/engine/e2e-test/test_api_model_pull_direct_url.py b/engine/e2e-test/test_api_model_pull_direct_url.py
index e93ca2ddd..aa15fbfba 100644
--- a/engine/e2e-test/test_api_model_pull_direct_url.py
+++ b/engine/e2e-test/test_api_model_pull_direct_url.py
@@ -21,7 +21,7 @@ def setup_and_teardown(self):
             [
                 "models",
                 "delete",
-                "TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf",
+                "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
             ],
         )
         yield
@@ -32,7 +32,7 @@ def setup_and_teardown(self):
             [
                 "models",
                 "delete",
-                "TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf",
+                "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
             ],
         )
         stop_server()
@@ -40,16 +40,35 @@ def setup_and_teardown(self):
     @pytest.mark.asyncio
     async def test_model_pull_with_direct_url_should_be_success(self):
         myobj = {
-            "model": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/blob/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
+            "model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf"
         }
         response = requests.post("http://localhost:3928/models/pull", json=myobj)
         assert response.status_code == 200
         await wait_for_websocket_download_success_event(timeout=None)
         get_model_response = requests.get(
-            "http://127.0.0.1:3928/models/TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
+            "http://127.0.0.1:3928/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
         )
         assert get_model_response.status_code == 200
         assert (
             get_model_response.json()["model"]
-            == "TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
+            == "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
+        )
+
+    @pytest.mark.asyncio
+    async def test_model_pull_with_direct_url_should_have_desired_name(self):
+        myobj = {
+            "model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf",
+            "name": "smol_llama_100m"
+        }
+        response = requests.post("http://localhost:3928/models/pull", json=myobj)
+        assert response.status_code == 200
+        await wait_for_websocket_download_success_event(timeout=None)
+        get_model_response = requests.get(
+            "http://127.0.0.1:3928/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
+        )
+        assert get_model_response.status_code == 200
+        print(get_model_response.json()["name"])
+        assert (
+            get_model_response.json()["name"]
+            == "smol_llama_100m"
         )
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
index b49df3420..4967b1dd9 100644
--- a/engine/services/model_service.cc
+++ b/engine/services/model_service.cc
@@ -17,7 +17,8 @@
 
 namespace {
 void ParseGguf(const DownloadItem& ggufDownloadItem,
-               std::optional<std::string> author) {
+               std::optional<std::string> author,
+               std::optional<std::string> name) {
   namespace fs = std::filesystem;
   namespace fmu = file_manager_utils;
   config::GGUFHandler gguf_handler;
@@ -32,6 +33,8 @@ void ParseGguf(const DownloadItem& ggufDownloadItem,
       fmu::ToRelativeCortexDataPath(fs::path(ggufDownloadItem.localPath));
   model_config.files = {file_rel_path.string()};
   model_config.model = ggufDownloadItem.id;
+  model_config.name =
+      name.has_value() ? name.value() : gguf_handler.GetModelConfig().name;
   yaml_handler.UpdateModelConfig(model_config);
 
   auto yaml_path{ggufDownloadItem.localPath};
@@ -223,7 +226,8 @@ std::optional<config::ModelConfig> ModelService::GetDownloadedModel(
 }
 
 cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
-    const std::string& url, std::optional<std::string> temp_model_id) {
+    const std::string& url, std::optional<std::string> temp_model_id,
+    std::optional<std::string> temp_name) {
   auto url_obj = url_parser::FromUrlString(url);
 
   if (url_obj.host == kHuggingFaceHost) {
@@ -279,9 +283,9 @@ cpp::result<DownloadTask, std::string> ModelService::HandleDownloadUrlAsync(
                                      .localPath = local_path,
                                  }}}};
 
-  auto on_finished = [author](const DownloadTask& finishedTask) {
+  auto on_finished = [author, temp_name](const DownloadTask& finishedTask) {
     auto gguf_download_item = finishedTask.items[0];
-    ParseGguf(gguf_download_item, author);
+    ParseGguf(gguf_download_item, author, temp_name);
   };
 
   downloadTask.id = unique_model_id;
@@ -346,7 +350,7 @@ cpp::result<std::string, std::string> ModelService::HandleUrl(
 
   auto on_finished = [author](const DownloadTask& finishedTask) {
     auto gguf_download_item = finishedTask.items[0];
-    ParseGguf(gguf_download_item, author);
+    ParseGguf(gguf_download_item, author, std::nullopt);
   };
 
   auto result = download_service_->AddDownloadTask(downloadTask, on_finished);
@@ -770,7 +774,7 @@ cpp::result<ModelPullInfo, std::string> ModelService::GetModelPullInfo(
     auto author{url_obj.pathParams[0]};
     auto model_id{url_obj.pathParams[1]};
     auto file_name{url_obj.pathParams.back()};
-    if (author == "cortexso") {      
+    if (author == "cortexso") {
       return ModelPullInfo{.id = model_id + ":" + url_obj.pathParams[3],
                            .downloaded_models = {},
                            .available_models = {},
@@ -787,8 +791,10 @@ cpp::result<ModelPullInfo, std::string> ModelService::GetModelPullInfo(
     if (parsed.size() != 2) {
       return cpp::fail("Invalid model handle: " + input);
     }
-    return ModelPullInfo{
-        .id = input, .downloaded_models = {}, .available_models = {}, .download_url = input};
+    return ModelPullInfo{.id = input,
+                         .downloaded_models = {},
+                         .available_models = {},
+                         .download_url = input};
   }
 
   if (input.find("/") != std::string::npos) {
diff --git a/engine/services/model_service.h b/engine/services/model_service.h
index 495685982..c1600e2a6 100644
--- a/engine/services/model_service.h
+++ b/engine/services/model_service.h
@@ -39,7 +39,7 @@ class ModelService {
       std::shared_ptr<DownloadService> download_service,
       std::shared_ptr<services::InferenceService> inference_service)
       : download_service_{download_service},
-        inference_svc_(inference_service) {};
+        inference_svc_(inference_service){};
 
   /**
    * Return model id if download successfully
@@ -81,7 +81,8 @@ class ModelService {
   cpp::result<std::string, std::string> HandleUrl(const std::string& url);
 
   cpp::result<DownloadTask, std::string> HandleDownloadUrlAsync(
-      const std::string& url, std::optional<std::string> temp_model_id);
+      const std::string& url, std::optional<std::string> temp_model_id,
+      std::optional<std::string> temp_name);
 
  private:
   /**

From f37ad6b437c879f7d0bf02d01b2c03b632536ea8 Mon Sep 17 00:00:00 2001
From: hiento09 <136591877+hiento09@users.noreply.github.com>
Date: Fri, 1 Nov 2024 12:39:58 +0700
Subject: [PATCH 3/7] chore: beta version add discord notification (#1605)

Co-authored-by: Hien To <tominhhien97@gmail.com>
---
 .github/workflows/beta-build.yml   | 60 ++++++++++++++++++++----------
 .github/workflows/stable-build.yml | 25 +------------
 2 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml
index 4dd0d8716..1c9e49790 100644
--- a/.github/workflows/beta-build.yml
+++ b/.github/workflows/beta-build.yml
@@ -35,6 +35,7 @@ jobs:
           name: "${{ env.VERSION }}"
           draft: true
           prerelease: false
+          generate_release_notes: true
 
   build-macos:
     uses: ./.github/workflows/template-build-macos.yml
@@ -79,26 +80,47 @@ jobs:
       upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
       cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
 
-  update_release_draft:
-    needs: [build-macos, build-windows-x64, build-linux-x64]
+  update_release:
+      needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64]
+      permissions:
+        # write permission is required to create a github release
+        contents: write
+        # write permission is required for autolabeler
+        # otherwise, read permission is required at least
+        pull-requests: write
+      runs-on: ubuntu-latest
+      steps:
+        - name: set release to prerelease
+          run: |
+            gh release edit v${{ needs.get-update-version.outputs.new_version }} --draft=false --prerelease
+            env:
+              GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  noti-discord:
+    needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, update_release]
+    runs-on: ubuntu-latest
     permissions:
-      # write permission is required to create a github release
       contents: write
-      # write permission is required for autolabeler
-      # otherwise, read permission is required at least
-      pull-requests: write
-    runs-on: ubuntu-latest
     steps:
-      # (Optional) GitHub Enterprise requires GHE_HOST variable set
-      #- name: Set GHE_HOST
-      #  run: |
-      #    echo "GHE_HOST=${GITHUB_SERVER_URL##https:\/\/}" >> $GITHUB_ENV
+      - name: Set version to environment variable
+        run: |
+          echo "VERSION=${{ needs.get-update-version.outputs.new_version }}" >> $GITHUB_ENV
+          echo "RUNNER_ID=$GITHUB_RUN_ID" >> $GITHUB_ENV
 
-      # Drafts your next Release notes as Pull Requests are merged into "master"
-      - uses: release-drafter/release-drafter@v5
-        # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml
-        # with:
-        #   config-name: my-config.yml
-        #   disable-autolabeler: true
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
+      - name: Notify Discord
+        uses: appleboy/discord-action@v1.0.0
+        with:
+          webhook_id: ${{ secrets.WEBHOOK_ID_BETA }}
+          webhook_token: ${{ secrets.WEBHOOK_TOKEN_BETA }}
+          message: |
+            Cortex.cpp beta build artifact version ${{ env.VERSION }}:
+            - Windows:
+              - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-network-installer.exe
+              - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-local-installer.exe
+            - macOS Universal:
+              - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-network-installer.pkg
+              - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-local-installer.pkg
+            - Linux Deb:
+              - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-network-installer.deb
+              - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb
+            - Github Release: https://github.com/janhq/cortex.cpp/releases/tag/v${{ env.VERSION }}
\ No newline at end of file
diff --git a/.github/workflows/stable-build.yml b/.github/workflows/stable-build.yml
index 8caea8a3e..2ffece895 100644
--- a/.github/workflows/stable-build.yml
+++ b/.github/workflows/stable-build.yml
@@ -35,6 +35,7 @@ jobs:
           name: "${{ env.VERSION }}"
           draft: true
           prerelease: false
+          generate_release_notes: true
 
   build-macos:
     uses: ./.github/workflows/template-build-macos.yml
@@ -78,27 +79,3 @@ jobs:
       channel: stable
       upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
       cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }}
-
-  update_release_draft:
-    needs: [build-macos, build-windows-x64, build-linux-x64]
-    permissions:
-      # write permission is required to create a github release
-      contents: write
-      # write permission is required for autolabeler
-      # otherwise, read permission is required at least
-      pull-requests: write
-    runs-on: ubuntu-latest
-    steps:
-      # (Optional) GitHub Enterprise requires GHE_HOST variable set
-      #- name: Set GHE_HOST
-      #  run: |
-      #    echo "GHE_HOST=${GITHUB_SERVER_URL##https:\/\/}" >> $GITHUB_ENV
-
-      # Drafts your next Release notes as Pull Requests are merged into "master"
-      - uses: release-drafter/release-drafter@v5
-        # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml
-        # with:
-        #   config-name: my-config.yml
-        #   disable-autolabeler: true
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file

From 11cd87be262e7591d24c302fa13d4972ae63b719 Mon Sep 17 00:00:00 2001
From: hiento09 <136591877+hiento09@users.noreply.github.com>
Date: Fri, 1 Nov 2024 13:33:24 +0700
Subject: [PATCH 4/7] Feat e2e test cortexso hub (#1590)

* feat: e2e testing cortexso model hub

* chore: schedule to run models test weekly

* chore: resolve warning pytest

* chore: use default branch cortexso hub

---------

Co-authored-by: Hien To <tominhhien97@gmail.com>
---
 .github/workflows/test-cortexso-model-hub.yml | 109 ++++++++++++
 engine/e2e-test/pytest.ini                    |   2 +
 engine/e2e-test/requirements.txt              |   1 +
 .../test_api_cortexso_hub_llamacpp_engine.py  | 166 ++++++++++++++++++
 4 files changed, 278 insertions(+)
 create mode 100644 .github/workflows/test-cortexso-model-hub.yml
 create mode 100644 engine/e2e-test/pytest.ini
 create mode 100644 engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py

diff --git a/.github/workflows/test-cortexso-model-hub.yml b/.github/workflows/test-cortexso-model-hub.yml
new file mode 100644
index 000000000..320369235
--- /dev/null
+++ b/.github/workflows/test-cortexso-model-hub.yml
@@ -0,0 +1,109 @@
+name: Test cortexso Model Hub
+
+on:
+  schedule:
+    - cron: "0 16 * * 5" # every Friday at 23:00 UTC+7
+  workflow_dispatch:
+
+jobs:
+  build-and-test:
+    runs-on: ${{ matrix.runs-on }}
+    timeout-minutes: 1440
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: "linux"
+            name: "amd64"
+            runs-on: "ubuntu-20-04-e2e-cortexcpp-model-hub"
+            cmake-flags: "-DCORTEX_CPP_VERSION=${{github.head_ref}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake"
+            build-deps-cmake-flags: ""
+            ccache-dir: ""
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+
+      - name: use python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install tools on Linux
+        run: |
+          sudo chown -R runner:runner /home/runner/cortexcpp
+          python3 -m pip install awscli
+
+      - name: Download vcpkg cache from s3
+        continue-on-error: true
+        run: |
+          aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux /home/runner/.cache/vcpkg  --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0
+        env:
+          AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
+          AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
+
+      - name: Configure vcpkg
+        run: |
+          cd engine
+          make configure-vcpkg
+
+      - name: Build
+        run: |
+          cd engine
+          make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}"
+
+      - name: Run unit tests
+        run: |
+          cd engine
+          make run-unit-tests
+
+      - name: Run setup config for linux
+        shell: bash
+        run: |
+          cd engine
+          ./build/cortex --version
+          sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc
+
+      - name: Run e2e tests
+        run: |
+          cd engine
+          cp build/cortex build/cortex-nightly
+          cp build/cortex build/cortex-beta
+          python -m pip install --upgrade pip
+          python -m pip install -r e2e-test/requirements.txt
+          pytest e2e-test/test_api_cortexso_hub_llamacpp_engine.py
+          rm build/cortex-nightly
+          rm build/cortex-beta
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_E2E }}
+
+      - name: Pre-package
+        run: |
+          cd engine
+          make pre-package DESTINATION_BINARY_NAME="cortex"
+
+      - name: Package
+        run: |
+          cd engine
+          make package
+
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: cortex-${{ matrix.os }}-${{ matrix.name }}
+          path: ./engine/cortex
+
+
+      - name: Upload linux vcpkg cache to s3
+        continue-on-error: true
+        if: always()
+        run: |
+          aws s3 sync /home/runner/.cache/vcpkg s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux  --endpoint ${{ secrets.MINIO_ENDPOINT }}
+        env:
+          AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}"
+          AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}"
+          AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}"
diff --git a/engine/e2e-test/pytest.ini b/engine/e2e-test/pytest.ini
new file mode 100644
index 000000000..0102b0a97
--- /dev/null
+++ b/engine/e2e-test/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+asyncio_default_fixture_loop_scope = function
diff --git a/engine/e2e-test/requirements.txt b/engine/e2e-test/requirements.txt
index f0eabb974..05b47e0b0 100644
--- a/engine/e2e-test/requirements.txt
+++ b/engine/e2e-test/requirements.txt
@@ -2,3 +2,4 @@ websockets
 pytest
 pytest-asyncio
 requests
+pyyaml
\ No newline at end of file
diff --git a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
new file mode 100644
index 000000000..e13c4827a
--- /dev/null
+++ b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py
@@ -0,0 +1,166 @@
+import pytest
+import requests
+import os
+import yaml
+
+from pathlib import Path
+from test_runner import (
+    run,
+    start_server,
+    stop_server,
+    wait_for_websocket_download_success_event,
+)
+
+collection_id = "cortexso/local-models-6683a6e29e8f3018845b16db"
+token = os.getenv("HF_TOKEN")
+if not token:
+    raise ValueError("HF_TOKEN environment variable not set")
+
+def get_repos_in_collection(collection_id, token):
+    # API endpoint to get list of repos in the collection
+    url = f"https://huggingface.co/api/collections/{collection_id}"
+    headers = {"Authorization": f"Bearer {token}"}
+    response = requests.get(url, headers=headers)
+
+    # Check response and retrieve repo IDs if successful
+    if response.status_code == 200:
+        return [repo['id'] for repo in response.json()["items"]]
+    else:
+        print("Error fetching repos:", response.status_code, response.json())
+        return []
+
+def get_repo_default_branch(repo_id, token):
+    # Direct link to metadata.yaml on the main branch
+    url = f"https://huggingface.co/{repo_id}/resolve/main/metadata.yml"
+    headers = {"Authorization": f"Bearer {token}"}
+    response = requests.get(url, headers=headers)
+
+    # Check response and retrieve the 'default' field value
+    if response.status_code == 200:
+        # Read YAML content from response text
+        metadata = yaml.safe_load(response.text)
+        return metadata.get("default")
+    else:
+        print(f"Error fetching metadata for {repo_id}:", response.status_code, response.json())
+        return None
+
+def get_all_repos_and_default_branches_from_metadata(collection_id, token):
+    # Get list of repos from the collection
+    repos = get_repos_in_collection(collection_id, token)
+    combined_list = []
+
+    # Iterate over each repo and fetch the default branch from metadata
+    for repo_id in repos:
+        default_branch = get_repo_default_branch(repo_id, token)
+        if default_branch and "gguf" in default_branch:
+            combined_list.append(f"{repo_id.split('/')[1]}:{default_branch}")
+
+    return combined_list
+
+#Call the function and print the results
+repo_branches = get_all_repos_and_default_branches_from_metadata(collection_id, token)
+
+class TestCortexsoModels:
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self, request):
+        # Setup
+        success = start_server()
+        if not success:
+            raise Exception("Failed to start server")
+        # Delete model if exists
+        for model_url in repo_branches:
+            run(
+                "Delete model",
+                [
+                    "models",
+                    "delete",
+                    model_url,
+                ],
+            )
+        yield
+
+        # Teardown
+        for model_url in repo_branches:
+            run(
+                "Delete model",
+                [
+                    "models",
+                    "delete",
+                    model_url,
+                ],
+            )
+        stop_server()
+
+    @pytest.mark.parametrize("model_url", repo_branches)
+    @pytest.mark.asyncio
+    async def test_models_on_cortexso_hub(self, model_url):
+
+        # Pull model from cortexso hub
+        json_body = {
+            "model": model_url
+        }
+        response = requests.post("http://localhost:3928/models/pull", json=json_body)
+        assert response.status_code == 200, f"Failed to pull model: {model_url}"
+        
+        await wait_for_websocket_download_success_event(timeout=None)
+        
+        # Check if the model was pulled successfully
+        get_model_response = requests.get(
+            f"http://127.0.0.1:3928/models/{model_url}"
+        )
+        assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}"
+        assert (
+            get_model_response.json()["model"] == model_url
+        ), f"Unexpected model name for: {model_url}"
+
+        # Check if the model is available in the list of models
+        response = requests.get("http://localhost:3928/models")
+        assert response.status_code == 200
+        models = [i["id"] for i in response.json()["data"]]
+        assert model_url in models, f"Model not found in list: {model_url}"
+
+        # Install Engine
+        exit_code, output, error = run(
+            "Install Engine", ["engines", "install", "llama-cpp"], timeout=None, capture = False
+        )
+        root = Path.home()
+        assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt")
+        assert exit_code == 0, f"Install engine failed with error: {error}"
+
+        # Start the model
+        response = requests.post("http://localhost:3928/models/start", json=json_body)
+        assert response.status_code == 200, f"status_code: {response.status_code}"
+
+        # Send an inference request
+        inference_json_body = {
+            "frequency_penalty": 0.2,
+            "max_tokens": 4096,
+            "messages": [
+                {
+                "content": "",
+                "role": "user"
+                }
+            ],
+            "model": model_url,
+            "presence_penalty": 0.6,
+            "stop": [
+                "End"
+            ],
+            "stream": False,
+            "temperature": 0.8,
+            "top_p": 0.95
+            }
+        response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"})
+        assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}"
+
+        # Stop the model
+        response = requests.post("http://localhost:3928/models/stop", json=json_body)
+        assert response.status_code == 200, f"status_code: {response.status_code}"
+
+        # Uninstall Engine
+        exit_code, output, error = run(
+            "Uninstall engine", ["engines", "uninstall", "llama-cpp"]
+        )
+        assert "Engine llama-cpp uninstalled successfully!" in output
+        assert exit_code == 0, f"Install engine failed with error: {error}"

From 322a0cf788506dbfc989e2e7402d1a0d9368822c Mon Sep 17 00:00:00 2001
From: hiento09 <136591877+hiento09@users.noreply.github.com>
Date: Fri, 1 Nov 2024 14:55:37 +0700
Subject: [PATCH 5/7] chore: notification discord add binary download url
 (#1607)

Co-authored-by: Hien To <tominhhien97@gmail.com>
---
 .github/workflows/beta-build.yml             | 3 +++
 .github/workflows/template-noti-discord.yaml | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml
index 1c9e49790..b5bee44b6 100644
--- a/.github/workflows/beta-build.yml
+++ b/.github/workflows/beta-build.yml
@@ -117,10 +117,13 @@ jobs:
             - Windows:
               - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-network-installer.exe
               - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-local-installer.exe
+              - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64.tar.gz
             - macOS Universal:
               - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-network-installer.pkg
               - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-local-installer.pkg
+              - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal.tar.gz
             - Linux Deb:
               - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-network-installer.deb
               - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb
+              - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64.tar.gz
             - Github Release: https://github.com/janhq/cortex.cpp/releases/tag/v${{ env.VERSION }}
\ No newline at end of file
diff --git a/.github/workflows/template-noti-discord.yaml b/.github/workflows/template-noti-discord.yaml
index 673bd3da7..3752a2201 100644
--- a/.github/workflows/template-noti-discord.yaml
+++ b/.github/workflows/template-noti-discord.yaml
@@ -33,10 +33,13 @@ jobs:
             - Windows:
               - Network Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/windows-amd64/cortex-${{ env.VERSION }}-windows-amd64-network-installer.exe
               - Local Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/windows-amd64/cortex-${{ env.VERSION }}-windows-amd64-local-installer.exe
+              - Binary: https://delta.jan.ai/cortex/v${{ env.VERSION }}/windows-amd64/cortex-nightly.tar.gz
             - macOS Universal:
               - Network Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/mac-universal/cortex-${{ env.VERSION }}-mac-universal-network-installer.pkg
               - Local Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/mac-universal/cortex-${{ env.VERSION }}-mac-universal-local-installer.pkg
+              - Binary: https://delta.jan.ai/cortex/v${{ env.VERSION }}/mac-universal/cortex-nightly.tar.gz
             - Linux Deb:
               - Network Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/linux-amd64/cortex-${{ env.VERSION }}-linux-amd64-network-installer.deb
               - Local Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/linux-amd64/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb
+              - Binary: https://delta.jan.ai/cortex/v${{ env.VERSION }}/linux-amd64/cortex-nightly.tar.gz
             - Github action run: https://github.com/janhq/cortex.cpp/actions/runs/${{ env.RUNNER_ID }}

From 43fbc529abc9c3926702fd2766804b0bc7a5c04b Mon Sep 17 00:00:00 2001
From: hiento09 <136591877+hiento09@users.noreply.github.com>
Date: Fri, 1 Nov 2024 16:13:06 +0700
Subject: [PATCH 6/7] chore: auto set pre-release for beta version (#1608)

Co-authored-by: Hien To <tominhhien97@gmail.com>
---
 .github/workflows/beta-build.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml
index b5bee44b6..0a4d3c735 100644
--- a/.github/workflows/beta-build.yml
+++ b/.github/workflows/beta-build.yml
@@ -90,6 +90,8 @@ jobs:
         pull-requests: write
       runs-on: ubuntu-latest
       steps:
+        - name: Getting the repo
+          uses: actions/checkout@v4
         - name: set release to prerelease
           run: |
             gh release edit v${{ needs.get-update-version.outputs.new_version }} --draft=false --prerelease

From 152b76fe8828fc8644119ba15aafa25eac044f0f Mon Sep 17 00:00:00 2001
From: vansangpfiev <vansangpfiev@gmail.com>
Date: Fri, 1 Nov 2024 17:58:12 +0700
Subject: [PATCH 7/7] fix: progress bar on CMD (#1609)

---
 engine/cli/utils/download_progress.cc | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/engine/cli/utils/download_progress.cc b/engine/cli/utils/download_progress.cc
index 9c38d4bdf..b47b4fc9a 100644
--- a/engine/cli/utils/download_progress.cc
+++ b/engine/cli/utils/download_progress.cc
@@ -36,6 +36,20 @@ bool DownloadProgress::Connect(const std::string& host, int port) {
 
 bool DownloadProgress::Handle(const DownloadType& event_type) {
   assert(!!ws_);
+#if defined(_WIN32)
+  HANDLE h_out = GetStdHandle(STD_OUTPUT_HANDLE);
+  DWORD dw_original_out_mode = 0;
+  if (h_out != INVALID_HANDLE_VALUE) {
+    GetConsoleMode(h_out, &dw_original_out_mode);
+
+    // Enable ANSI escape code processing
+    DWORD dw_requested_out_mode =
+        dw_original_out_mode | ENABLE_VIRTUAL_TERMINAL_PROCESSING;
+    if (!SetConsoleMode(h_out, dw_requested_out_mode)) {
+      SetConsoleMode(h_out, dw_original_out_mode);
+    }
+  }
+#endif
   std::unordered_map<std::string, uint64_t> totals;
   status_ = DownloadStatus::DownloadStarted;
   std::unique_ptr<indicators::DynamicProgress<indicators::ProgressBar>> bars;
@@ -124,6 +138,11 @@ bool DownloadProgress::Handle(const DownloadType& event_type) {
     ws_->dispatch(handle_message);
   }
   indicators::show_console_cursor(true);
+#if defined(_WIN32)
+  if (dw_original_out_mode != 0 && h_out != INVALID_HANDLE_VALUE) {
+    SetConsoleMode(h_out, dw_original_out_mode);
+  }
+#endif
   if (status_ == DownloadStatus::DownloadError)
     return false;
   return true;