feat: model sources (#1777)

* feat: prioritize GPUs * fix: migrate db * fix: add priority * fix: db * fix: more * feat: model sources * feat: support delete API * feat: cli: support models sources add * feat: cli: model source delete * feat: cli: add model source list * feat: sync cortex.db * chore: cleanup * feat: add metadata for model * fix: migration * chore: unit tests: cleanup * fix: add metadata * fix: pull model * chore: unit tests: update * chore: add e2e tests for models sources * chore: add API docs * chore: rename --------- Co-authored-by: vansangpfiev <[email protected]>
janhq · Dec 12, 2024 · f473b0b · f473b0b
1 parent 8dde05c
commit f473b0b
Show file tree

Hide file tree

Showing 23 changed files with 1,269 additions and 271 deletions.
diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json
@@ -807,6 +807,105 @@
         "tags": ["Pulling Models"]
       }
     },
+    "/v1/models/sources": {
+      "post": {
+        "summary": "Add a model source",
+        "description": "User can add a Huggingface Organization or Repository",        
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "source": {
+                    "type": "string",
+                    "description": "The url of model source to add",
+                    "example": "https://huggingface.co/cortexso/tinyllama"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful installation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "example": "Added model source"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      },
+      "delete": {
+        "summary": "Remove a model source",
+        "description": "User can remove a Huggingface Organization or Repository",        
+        "requestBody": {
+          "required": false,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "source": {
+                    "type": "string",
+                     "description": "The url of model source to remove",
+                    "example": "https://huggingface.co/cortexso/tinyllama"
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Successful uninstallation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "message": {
+                      "type": "string",
+                      "description": "Removed model source successfully!",
+                      "example": "Removed model source successfully!"
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Bad request",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "error": {
+                      "type": "string",
+                      "description": "Error message describing the issue with the request"
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Pulling Models"]
+      }
+    },
     "/v1/threads": {
       "post": {
         "operationId": "ThreadsController_create",

diff --git a/engine/cli/command_line_parser.cc b/engine/cli/command_line_parser.cc
@@ -20,6 +20,9 @@
 #include "commands/model_import_cmd.h"
 #include "commands/model_list_cmd.h"
 #include "commands/model_pull_cmd.h"
+#include "commands/model_source_add_cmd.h"
+#include "commands/model_source_del_cmd.h"
+#include "commands/model_source_list_cmd.h"
 #include "commands/model_start_cmd.h"
 #include "commands/model_stop_cmd.h"
 #include "commands/model_upd_cmd.h"
@@ -253,6 +256,8 @@ void CommandLineParser::SetupModelCommands() {
                             "Display cpu mode");
   list_models_cmd->add_flag("--gpu_mode", cml_data_.display_gpu_mode,
                             "Display gpu mode");
+  list_models_cmd->add_flag("--available", cml_data_.display_available_model,
+                            "Display available models to download");
   list_models_cmd->group(kSubcommands);
   list_models_cmd->callback([this]() {
     if (std::exchange(executed_, true))
@@ -261,7 +266,8 @@ void CommandLineParser::SetupModelCommands() {
         cml_data_.config.apiServerHost,
         std::stoi(cml_data_.config.apiServerPort), cml_data_.filter,
         cml_data_.display_engine, cml_data_.display_version,
-        cml_data_.display_cpu_mode, cml_data_.display_gpu_mode);
+        cml_data_.display_cpu_mode, cml_data_.display_gpu_mode,
+        cml_data_.display_available_model);
   });
 
   auto get_models_cmd =
@@ -329,6 +335,74 @@ void CommandLineParser::SetupModelCommands() {
                                     std::stoi(cml_data_.config.apiServerPort),
                                     cml_data_.model_id, cml_data_.model_path);
   });
+
+  auto model_source_cmd = models_cmd->add_subcommand(
+      "sources", "Subcommands for managing model sources");
+  model_source_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                          " models sources [options] [subcommand]");
+  model_source_cmd->group(kSubcommands);
+
+  model_source_cmd->callback([this, model_source_cmd] {
+    if (std::exchange(executed_, true))
+      return;
+    if (model_source_cmd->get_subcommands().empty()) {
+      CLI_LOG(model_source_cmd->help());
+    }
+  });
+
+  auto model_src_add_cmd =
+      model_source_cmd->add_subcommand("add", "Add a model source");
+  model_src_add_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                           " models sources add [model_source]");
+  model_src_add_cmd->group(kSubcommands);
+  model_src_add_cmd->add_option("source", cml_data_.model_src, "");
+  model_src_add_cmd->callback([&]() {
+    if (std::exchange(executed_, true))
+      return;
+    if (cml_data_.model_src.empty()) {
+      CLI_LOG("[model_source] is required\n");
+      CLI_LOG(model_src_add_cmd->help());
+      return;
+    };
+
+    commands::ModelSourceAddCmd().Exec(
+        cml_data_.config.apiServerHost,
+        std::stoi(cml_data_.config.apiServerPort), cml_data_.model_src);
+  });
+
+  auto model_src_del_cmd =
+      model_source_cmd->add_subcommand("remove", "Remove a model source");
+  model_src_del_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                           " models sources remove [model_source]");
+  model_src_del_cmd->group(kSubcommands);
+  model_src_del_cmd->add_option("source", cml_data_.model_src, "");
+  model_src_del_cmd->callback([&]() {
+    if (std::exchange(executed_, true))
+      return;
+    if (cml_data_.model_src.empty()) {
+      CLI_LOG("[model_source] is required\n");
+      CLI_LOG(model_src_del_cmd->help());
+      return;
+    };
+
+    commands::ModelSourceDelCmd().Exec(
+        cml_data_.config.apiServerHost,
+        std::stoi(cml_data_.config.apiServerPort), cml_data_.model_src);
+  });
+
+  auto model_src_list_cmd =
+      model_source_cmd->add_subcommand("list", "List all model sources");
+  model_src_list_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
+                            " models sources list");
+  model_src_list_cmd->group(kSubcommands);
+  model_src_list_cmd->callback([&]() {
+    if (std::exchange(executed_, true))
+      return;
+
+    commands::ModelSourceListCmd().Exec(
+        cml_data_.config.apiServerHost,
+        std::stoi(cml_data_.config.apiServerPort));
+  });
 }
 
 void CommandLineParser::SetupConfigsCommands() {

diff --git a/engine/cli/command_line_parser.h b/engine/cli/command_line_parser.h
@@ -66,6 +66,7 @@ class CommandLineParser {
     bool display_version = false;
     bool display_cpu_mode = false;
     bool display_gpu_mode = false;
+    bool display_available_model = false;
     std::string filter = "";
     std::string log_level = "INFO";
 
@@ -74,6 +75,7 @@ class CommandLineParser {
     int port;
     config_yaml_utils::CortexConfig config;
     std::unordered_map<std::string, std::string> model_update_options;
+    std::string model_src;
   };
   CmlData cml_data_;
   std::unordered_map<std::string, std::string> config_update_opts_;

diff --git a/engine/cli/commands/model_list_cmd.cc b/engine/cli/commands/model_list_cmd.cc
@@ -21,7 +21,7 @@ using Row_t =
 void ModelListCmd::Exec(const std::string& host, int port,
                         const std::string& filter, bool display_engine,
                         bool display_version, bool display_cpu_mode,
-                        bool display_gpu_mode) {
+                        bool display_gpu_mode, bool available) {
   // Start server if server is not started yet
   if (!commands::IsServerAlive(host, port)) {
     CLI_LOG("Starting server ...");
@@ -73,40 +73,62 @@ void ModelListCmd::Exec(const std::string& host, int port,
         continue;
       }
 
-      count += 1;
+      if (available) {
+        if (v["status"].asString() != "downloadable") {
+          continue;
+        }
 
-      std::vector<std::string> row = {std::to_string(count),
-                                      v["model"].asString()};
-      if (display_engine) {
-        row.push_back(v["engine"].asString());
-      }
-      if (display_version) {
-        row.push_back(v["version"].asString());
-      }
+        count += 1;
 
-      if (auto& r = v["recommendation"]; !r.isNull()) {
-        if (display_cpu_mode) {
-          if (!r["cpu_mode"].isNull()) {
-            row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
-          }
+        std::vector<std::string> row = {std::to_string(count),
+                                        v["model"].asString()};
+        if (display_engine) {
+          row.push_back(v["engine"].asString());
+        }
+        if (display_version) {
+          row.push_back(v["version"].asString());
+        }
+        table.add_row({row.begin(), row.end()});
+      } else {
+        if (v["status"].asString() == "downloadable") {
+          continue;
+        }
+
+        count += 1;
+
+        std::vector<std::string> row = {std::to_string(count),
+                                        v["model"].asString()};
+        if (display_engine) {
+          row.push_back(v["engine"].asString());
+        }
+        if (display_version) {
+          row.push_back(v["version"].asString());
         }
 
-        if (display_gpu_mode) {
-          if (!r["gpu_mode"].isNull()) {
-            std::string s;
-            s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
-            s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
-                 " - ";
-            s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
-            s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
-            s += "recommended ngl: " +
-                 r["gpu_mode"][0]["recommend_ngl"].asString();
-            row.push_back(s);
+        if (auto& r = v["recommendation"]; !r.isNull()) {
+          if (display_cpu_mode) {
+            if (!r["cpu_mode"].isNull()) {
+              row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
+            }
+          }
+
+          if (display_gpu_mode) {
+            if (!r["gpu_mode"].isNull()) {
+              std::string s;
+              s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
+              s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
+                   " - ";
+              s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
+              s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
+              s += "recommended ngl: " +
+                   r["gpu_mode"][0]["recommend_ngl"].asString();
+              row.push_back(s);
+            }
           }
         }
-      }
 
-      table.add_row({row.begin(), row.end()});
+        table.add_row({row.begin(), row.end()});
+      }
     }
   }
 

diff --git a/engine/cli/commands/model_list_cmd.h b/engine/cli/commands/model_list_cmd.h
@@ -8,6 +8,7 @@ class ModelListCmd {
  public:
   void Exec(const std::string& host, int port, const std::string& filter,
             bool display_engine = false, bool display_version = false,
-            bool display_cpu_mode = false, bool display_gpu_mode = false);
+            bool display_cpu_mode = false, bool display_gpu_mode = false,
+            bool available = false);
 };
 }  // namespace commands
diff --git a/engine/cli/commands/model_source_add_cmd.cc b/engine/cli/commands/model_source_add_cmd.cc
@@ -0,0 +1,38 @@
+#include "model_source_add_cmd.h"
+#include "server_start_cmd.h"
+#include "utils/json_helper.h"
+#include "utils/logging_utils.h"
+namespace commands {
+bool ModelSourceAddCmd::Exec(const std::string& host, int port, const std::string& model_source) {
+  // Start server if server is not started yet
+  if (!commands::IsServerAlive(host, port)) {
+    CLI_LOG("Starting server ...");
+    commands::ServerStartCmd ssc;
+    if (!ssc.Exec(host, port)) {
+      return false;
+    }
+  }
+
+  auto url = url_parser::Url{
+      .protocol = "http",
+      .host = host + ":" + std::to_string(port),
+      .pathParams = {"v1", "models", "sources"},
+  };
+
+  Json::Value json_data;
+  json_data["source"] = model_source;
+
+  auto data_str = json_data.toStyledString();
+  auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str);
+  if (res.has_error()) {
+    auto root = json_helper::ParseJsonString(res.error());
+    CLI_LOG(root["message"].asString());
+    return false;
+  }
+
+  CLI_LOG("Added model source: " << model_source);
+  return true;
+}
+
+
+};  // namespace commands
diff --git a/engine/cli/commands/model_source_add_cmd.h b/engine/cli/commands/model_source_add_cmd.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#include <string>
+#include <unordered_map>
+
+namespace commands {
+
+class ModelSourceAddCmd {
+ public:
+  bool Exec(const std::string& host, int port, const std::string& model_source);
+};
+}  // namespace commands