diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index 4792fe306..a05f8b24e 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -2522,26 +2522,31 @@ "default": "llama-cpp" }, "description": "The type of engine" - }, - { - "name": "version", - "in": "query", - "required": true, - "schema": { - "type": "string" - }, - "description": "The version of the engine variant" - }, - { - "name": "variant", - "in": "query", - "required": true, - "schema": { - "type": "string" - }, - "description": "The variant of the engine" } ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "required": ["version", "variant"], + "properties": { + "version": { + "type": "string", + "description": "The version of the engine variant", + "example": "0.1.34" + }, + "variant": { + "type": "string", + "description": "The variant of the engine", + "example": "mac-arm64" + } + } + } + } + } + }, "responses": { "200": { "description": "Successful response", @@ -3087,7 +3092,7 @@ "items": { "type": "string" }, - "example": ["http://localhost:39281", "https://cortex.so"] + "example": ["http://127.0.0.1:39281", "https://cortex.so"] }, "cors": { "type": "boolean", @@ -3134,7 +3139,7 @@ }, "example": { "allowed_origins": [ - "http://localhost:39281", + "http://127.0.0.1:39281", "https://cortex.so" ], "cors": false, @@ -3175,7 +3180,7 @@ "type": "string" }, "description": "List of allowed origins.", - "example": ["http://localhost:39281", "https://cortex.so"] + "example": ["http://127.0.0.1:39281", "https://cortex.so"] }, "proxy_username": { "type": "string", @@ -3244,7 +3249,7 @@ "type": "string" }, "example": [ - "http://localhost:39281", + "http://127.0.0.1:39281", "https://cortex.so" ] }, @@ -6277,7 +6282,7 @@ }, "required": ["available", "total", "type"] }, - "Storage": { + "StorageDto": { "type": "object", "properties": { "available": { diff --git a/engine/CMakeLists.txt b/engine/CMakeLists.txt index d876f67a0..bc613dcb3 100644 --- a/engine/CMakeLists.txt +++ b/engine/CMakeLists.txt @@ -146,8 +146,6 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/dylib_path_manager.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/openai_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/anthropic_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/template_renderer.cc ) diff --git a/engine/cli/CMakeLists.txt b/engine/cli/CMakeLists.txt index 904d43cc1..8f146484e 100644 --- a/engine/cli/CMakeLists.txt +++ b/engine/cli/CMakeLists.txt @@ -84,8 +84,6 @@ add_executable(${TARGET_NAME} main.cc ${CMAKE_CURRENT_SOURCE_DIR}/../services/inference_service.cc ${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/openai_engine.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/anthropic_engine.cc ${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/template_renderer.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc ${CMAKE_CURRENT_SOURCE_DIR}/utils/download_progress.cc diff --git a/engine/common/engine_servicei.h b/engine/common/engine_servicei.h index 85fa87d76..a4b0c8732 100644 --- a/engine/common/engine_servicei.h +++ b/engine/common/engine_servicei.h @@ -58,4 +58,6 @@ class EngineServiceI { GetEngineByNameAndVariant( const std::string& engine_name, const std::optional variant = std::nullopt) = 0; + + virtual bool IsRemoteEngine(const std::string& engine_name) = 0; }; diff --git a/engine/common/message.h b/engine/common/message.h index 3bff6f048..d31c4f0d3 100644 --- a/engine/common/message.h +++ b/engine/common/message.h @@ -137,6 +137,27 @@ struct Message : JsonSerializable { if (root["content"].isArray() && !root["content"].empty()) { if (root["content"][0]["type"].asString() == "text") { message.content = ParseContents(std::move(root["content"])).value(); + } else if (root["content"][0]["type"].asString() == "image") { + // deprecated, for supporting jan and should be removed in the future + auto text_str = root["content"][0]["text"]["value"].asString(); + auto img_url = + root["content"][0]["text"]["annotations"][0].asString(); + auto text_content = std::make_unique(); + { + auto text = OpenAi::Text(); + auto empty_annotations = + std::vector>(); + text.value = std::move(text_str); + text.annotations = std::move(empty_annotations); + text_content->text = std::move(text); + } + + auto image_url_obj = OpenAi::ImageUrl(img_url, "auto"); + auto image_url_content = std::make_unique( + "image_url", std::move(image_url_obj)); + + message.content.push_back(std::move(text_content)); + message.content.push_back(std::move(image_url_content)); } else { // deprecated, for supporting jan and should be removed in the future // check if annotations is empty diff --git a/engine/common/message_content_image_url.h b/engine/common/message_content_image_url.h index b86544e38..336cf01d3 100644 --- a/engine/common/message_content_image_url.h +++ b/engine/common/message_content_image_url.h @@ -4,14 +4,21 @@ namespace OpenAi { -struct ImageUrl { - // The external URL of the image, must be a supported image types: jpeg, jpg, png, gif, webp. +struct ImageUrl : public JsonSerializable { + /** + * The external URL of the image, must be a supported image types: + * jpeg, jpg, png, gif, webp. + */ std::string url; - // Specifies the detail level of the image. low uses fewer tokens, you can opt in to high resolution using high. Default value is auto + /** + * Specifies the detail level of the image. low uses fewer tokens, you + * can opt in to high resolution using high. Default value is auto + */ std::string detail; - ImageUrl() = default; + ImageUrl(const std::string& url, const std::string& detail = "auto") + : url{url}, detail{detail} {} ImageUrl(ImageUrl&&) noexcept = default; @@ -20,13 +27,25 @@ struct ImageUrl { ImageUrl(const ImageUrl&) = delete; ImageUrl& operator=(const ImageUrl&) = delete; + + cpp::result ToJson() override { + try { + Json::Value root; + root["url"] = url; + root["detail"] = detail; + return root; + } catch (const std::exception& e) { + return cpp::fail(std::string("ToJson failed: ") + e.what()); + } + } }; // References an image URL in the content of a message. struct ImageUrlContent : Content { // The type of the content part. - ImageUrlContent(const std::string& type) : Content(type) {} + explicit ImageUrlContent(const std::string& type, ImageUrl&& image_url) + : Content(type), image_url{std::move(image_url)} {} ImageUrlContent(ImageUrlContent&&) noexcept = default; @@ -38,6 +57,8 @@ struct ImageUrlContent : Content { ImageUrl image_url; + ~ImageUrlContent() override = default; + static cpp::result FromJson( Json::Value&& json) { if (json.empty()) { @@ -45,11 +66,9 @@ struct ImageUrlContent : Content { } try { - ImageUrlContent content{"image_url"}; - ImageUrl image_url; - image_url.url = std::move(json["image_url"]["url"].asString()); - image_url.detail = std::move(json["image_url"]["detail"].asString()); - content.image_url = std::move(image_url); + auto image_url = ImageUrl(json["image_url"]["url"].asString(), + json["image_url"]["detail"].asString()); + ImageUrlContent content{"image_url", std::move(image_url)}; return content; } catch (const std::exception& e) { return cpp::fail(std::string("FromJson failed: ") + e.what()); @@ -60,8 +79,7 @@ struct ImageUrlContent : Content { try { Json::Value json; json["type"] = type; - json["image_url"]["url"] = image_url.url; - json["image_url"]["detail"] = image_url.detail; + json["image_url"] = image_url.ToJson().value(); return json; } catch (const std::exception& e) { return cpp::fail(std::string("ToJson failed: ") + e.what()); diff --git a/engine/common/message_content_text.h b/engine/common/message_content_text.h index ea6aab1ab..5ede2582d 100644 --- a/engine/common/message_content_text.h +++ b/engine/common/message_content_text.h @@ -122,7 +122,6 @@ struct FilePathWrapper : Annotation { struct Text : JsonSerializable { // The data that makes up the text. - Text() = default; Text(Text&&) noexcept = default; @@ -214,6 +213,8 @@ struct TextContent : Content { Text text; + ~TextContent() override = default; + static cpp::result FromJson(Json::Value&& json) { if (json.empty()) { return cpp::fail("Json string is empty"); diff --git a/engine/config/model_config.h b/engine/config/model_config.h index 84e175d54..a799adb27 100644 --- a/engine/config/model_config.h +++ b/engine/config/model_config.h @@ -8,52 +8,12 @@ #include #include #include +#include "config/remote_template.h" #include "utils/format_utils.h" #include "utils/remote_models_utils.h" namespace config { -namespace { -const std::string kOpenAITransformReqTemplate = - R"({ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} })"; -const std::string kOpenAITransformRespTemplate = - R"({ {%- set first = true -%} {%- for key, value in input_request -%} {%- if key == \"id\" or key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"object\" or key == \"usage\" -%} {%- if not first -%},{%- endif -%} \"{{ key }}\": {{ tojson(value) }} {%- set first = false -%} {%- endif -%} {%- endfor -%} })"; -const std::string kAnthropicTransformReqTemplate = - R"({ {% set first = true %} {% for key, value in input_request %} {% if key == \"system\" or key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} })"; -const std::string kAnthropicTransformRespTemplate = R"({ - "id": "{{ input_request.id }}", - "created": null, - "object": "chat.completion", - "model": "{{ input_request.model }}", - "choices": [ - { - "index": 0, - "message": { - "role": "{{ input_request.role }}", - "content": "{% if input_request.content and input_request.content.0.type == "text" %} {{input_request.content.0.text}} {% endif %}", - "refusal": null - }, - "logprobs": null, - "finish_reason": "{{ input_request.stop_reason }}" - } - ], - "usage": { - "prompt_tokens": {{ input_request.usage.input_tokens }}, - "completion_tokens": {{ input_request.usage.output_tokens }}, - "total_tokens": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, - "prompt_tokens_details": { - "cached_tokens": 0 - }, - "completion_tokens_details": { - "reasoning_tokens": 0, - "accepted_prediction_tokens": 0, - "rejected_prediction_tokens": 0 - } - }, - "system_fingerprint": "fp_6b68a8204b" - })"; -} // namespace - struct RemoteModelConfig { std::string model; std::string api_key_template; @@ -108,6 +68,7 @@ struct RemoteModelConfig { kOpenAITransformRespTemplate; } } + metadata = json.get("metadata", metadata); } diff --git a/engine/config/remote_template.h b/engine/config/remote_template.h new file mode 100644 index 000000000..8a17aaa9a --- /dev/null +++ b/engine/config/remote_template.h @@ -0,0 +1,66 @@ +#include + +namespace config { +const std::string kOpenAITransformReqTemplate = + R"({ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} })"; +const std::string kOpenAITransformRespTemplate = + R"({ {%- set first = true -%} {%- for key, value in input_request -%} {%- if key == "id" or key == "choices" or key == "created" or key == "model" or key == "service_tier" or key == "system_fingerprint" or key == "object" or key == "usage" -%} {%- if not first -%},{%- endif -%} "{{ key }}": {{ tojson(value) }} {%- set first = false -%} {%- endif -%} {%- endfor -%} })"; +const std::string kAnthropicTransformReqTemplate = + R"({ + {% for key, value in input_request %} + {% if key == "messages" %} + {% if input_request.messages.0.role == "system" %} + "system": "{{ input_request.messages.0.content }}", + "messages": [ + {% for message in input_request.messages %} + {% if not loop.is_first %} + {"role": "{{ message.role }}", "content": "{{ message.content }}" } {% if not loop.is_last %},{% endif %} + {% endif %} + {% endfor %} + ] + {% else %} + "messages": [ + {% for message in input_request.messages %} + {"role": " {{ message.role}}", "content": "{{ message.content }}" } {% if not loop.is_last %},{% endif %} + {% endfor %} + ] + {% endif %} + {% else if key == "system" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} + "{{ key }}": {{ tojson(value) }} + {% endif %} + {% if not loop.is_last %},{% endif %} + {% endfor %} })"; +const std::string kAnthropicTransformRespTemplate = R"({ + "id": "{{ input_request.id }}", + "created": null, + "object": "chat.completion", + "model": "{{ input_request.model }}", + "choices": [ + { + "index": 0, + "message": { + "role": "{{ input_request.role }}", + "content": "{% if input_request.content and input_request.content.0.type == "text" %} {{input_request.content.0.text}} {% endif %}", + "refusal": null + }, + "logprobs": null, + "finish_reason": "{{ input_request.stop_reason }}" + } + ], + "usage": { + "prompt_tokens": {{ input_request.usage.input_tokens }}, + "completion_tokens": {{ input_request.usage.output_tokens }}, + "total_tokens": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, + "prompt_tokens_details": { + "cached_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "system_fingerprint": "fp_6b68a8204b" + })"; + +} // namespace config \ No newline at end of file diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index affa45d52..59793b2a6 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -200,7 +200,7 @@ void Models::ListModel( .string()); auto model_config = yaml_handler.GetModelConfig(); - if (!remote_engine::IsRemoteEngine(model_config.engine)) { + if (!engine_service_->IsRemoteEngine(model_config.engine)) { Json::Value obj = model_config.ToJson(); obj["id"] = model_entry.model; obj["model"] = model_entry.model; @@ -632,7 +632,7 @@ void Models::GetRemoteModels( const HttpRequestPtr& req, std::function&& callback, const std::string& engine_id) { - if (!remote_engine::IsRemoteEngine(engine_id)) { + if (!engine_service_->IsRemoteEngine(engine_id)) { Json::Value ret; ret["message"] = "Not a remote engine: " + engine_id; auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); @@ -668,8 +668,7 @@ void Models::AddRemoteModel( auto model_handle = (*(req->getJsonObject())).get("model", "").asString(); auto engine_name = (*(req->getJsonObject())).get("engine", "").asString(); - /* To do: uncomment when remote engine is ready - + auto engine_validate = engine_service_->IsEngineReady(engine_name); if (engine_validate.has_error()) { Json::Value ret; @@ -679,6 +678,7 @@ void Models::AddRemoteModel( callback(resp); return; } + if (!engine_validate.value()) { Json::Value ret; ret["message"] = "Engine is not ready! Please install first!"; @@ -687,7 +687,7 @@ void Models::AddRemoteModel( callback(resp); return; } - */ + config::RemoteModelConfig model_config; model_config.LoadFromJson(*(req->getJsonObject())); cortex::db::Models modellist_utils_obj; diff --git a/engine/controllers/swagger.cc b/engine/controllers/swagger.cc index 96a6c3837..abb80b94e 100644 --- a/engine/controllers/swagger.cc +++ b/engine/controllers/swagger.cc @@ -2,30 +2,17 @@ #include "cortex_openapi.h" #include "utils/cortex_utils.h" -constexpr auto ScalarUi = R"( - - - - Cortex API Reference - - - - - - - - - -)"; - -Json::Value SwaggerController::generateOpenAPISpec() { +Json::Value SwaggerController::GenerateOpenApiSpec() const { Json::Value root; Json::Reader reader; reader.parse(CortexOpenApi::GetOpenApiJson(), root); + + Json::Value server_url; + server_url["url"] = "http://" + host_ + ":" + port_; + Json::Value resp_data(Json::arrayValue); + resp_data.append(server_url); + + root["servers"] = resp_data; return root; } @@ -41,7 +28,7 @@ void SwaggerController::serveSwaggerUI( void SwaggerController::serveOpenAPISpec( const drogon::HttpRequestPtr& req, std::function&& callback) const { - Json::Value spec = generateOpenAPISpec(); + auto spec = GenerateOpenApiSpec(); auto resp = cortex_utils::CreateCortexHttpJsonResponse(spec); callback(resp); } diff --git a/engine/controllers/swagger.h b/engine/controllers/swagger.h index 4099bc447..61db1cc6e 100644 --- a/engine/controllers/swagger.h +++ b/engine/controllers/swagger.h @@ -5,13 +5,38 @@ using namespace drogon; -class SwaggerController : public drogon::HttpController { +class SwaggerController + : public drogon::HttpController { + + constexpr static auto ScalarUi = R"( + + + + Cortex API Reference + + + + + + + + + +)"; + public: METHOD_LIST_BEGIN ADD_METHOD_TO(SwaggerController::serveSwaggerUI, "/", Get); ADD_METHOD_TO(SwaggerController::serveOpenAPISpec, "/openapi.json", Get); METHOD_LIST_END + explicit SwaggerController(const std::string& host, const std::string& port) + : host_{host}, port_{port} {}; + void serveSwaggerUI( const drogon::HttpRequestPtr& req, std::function&& callback) const; @@ -21,6 +46,8 @@ class SwaggerController : public drogon::HttpController { std::function&& callback) const; private: - static const std::string swaggerUIHTML; - static Json::Value generateOpenAPISpec(); -}; \ No newline at end of file + std::string host_; + std::string port_; + + Json::Value GenerateOpenApiSpec() const; +}; diff --git a/engine/extensions/remote-engine/anthropic_engine.cc b/engine/extensions/remote-engine/anthropic_engine.cc deleted file mode 100644 index 847cba566..000000000 --- a/engine/extensions/remote-engine/anthropic_engine.cc +++ /dev/null @@ -1,62 +0,0 @@ -#include "anthropic_engine.h" -#include -#include -#include "utils/logging_utils.h" - -namespace remote_engine { -namespace { -constexpr const std::array kAnthropicModels = { - "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022", - "claude-3-opus-20240229", "claude-3-sonnet-20240229", - "claude-3-haiku-20240307"}; -} -void AnthropicEngine::GetModels( - std::shared_ptr json_body, - std::function&& callback) { - Json::Value json_resp; - Json::Value model_array(Json::arrayValue); - { - std::shared_lock l(models_mtx_); - for (const auto& [m, _] : models_) { - Json::Value val; - val["id"] = m; - val["engine"] = "anthropic"; - val["start_time"] = "_"; - val["model_size"] = "_"; - val["vram"] = "_"; - val["ram"] = "_"; - val["object"] = "model"; - model_array.append(val); - } - } - - json_resp["object"] = "list"; - json_resp["data"] = model_array; - - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = 200; - callback(std::move(status), std::move(json_resp)); - CTL_INF("Running models responded"); -} - -Json::Value AnthropicEngine::GetRemoteModels() { - Json::Value json_resp; - Json::Value model_array(Json::arrayValue); - for (const auto& m : kAnthropicModels) { - Json::Value val; - val["id"] = std::string(m); - val["engine"] = "anthropic"; - val["created"] = "_"; - val["object"] = "model"; - model_array.append(val); - } - - json_resp["object"] = "list"; - json_resp["data"] = model_array; - CTL_INF("Remote models responded"); - return json_resp; -} -} // namespace remote_engine \ No newline at end of file diff --git a/engine/extensions/remote-engine/anthropic_engine.h b/engine/extensions/remote-engine/anthropic_engine.h deleted file mode 100644 index bcd3dfaf7..000000000 --- a/engine/extensions/remote-engine/anthropic_engine.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once -#include "remote_engine.h" - -namespace remote_engine { - class AnthropicEngine: public RemoteEngine { -public: - void GetModels( - std::shared_ptr json_body, - std::function&& callback) override; - - Json::Value GetRemoteModels() override; - }; -} \ No newline at end of file diff --git a/engine/extensions/remote-engine/openai_engine.cc b/engine/extensions/remote-engine/openai_engine.cc deleted file mode 100644 index 7c7d70385..000000000 --- a/engine/extensions/remote-engine/openai_engine.cc +++ /dev/null @@ -1,54 +0,0 @@ -#include "openai_engine.h" -#include "utils/logging_utils.h" - -namespace remote_engine { - -void OpenAiEngine::GetModels( - std::shared_ptr json_body, - std::function&& callback) { - Json::Value json_resp; - Json::Value model_array(Json::arrayValue); - { - std::shared_lock l(models_mtx_); - for (const auto& [m, _] : models_) { - Json::Value val; - val["id"] = m; - val["engine"] = "openai"; - val["start_time"] = "_"; - val["model_size"] = "_"; - val["vram"] = "_"; - val["ram"] = "_"; - val["object"] = "model"; - model_array.append(val); - } - } - - json_resp["object"] = "list"; - json_resp["data"] = model_array; - - Json::Value status; - status["is_done"] = true; - status["has_error"] = false; - status["is_stream"] = false; - status["status_code"] = 200; - callback(std::move(status), std::move(json_resp)); - CTL_INF("Running models responded"); -} - -Json::Value OpenAiEngine::GetRemoteModels() { - auto response = MakeGetModelsRequest(); - if (response.error) { - Json::Value error; - error["error"] = response.error_message; - return error; - } - Json::Value response_json; - Json::Reader reader; - if (!reader.parse(response.body, response_json)) { - Json::Value error; - error["error"] = "Failed to parse response"; - return error; - } - return response_json; -} -} // namespace remote_engine \ No newline at end of file diff --git a/engine/extensions/remote-engine/openai_engine.h b/engine/extensions/remote-engine/openai_engine.h deleted file mode 100644 index 61dc68f0c..000000000 --- a/engine/extensions/remote-engine/openai_engine.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include "remote_engine.h" - -namespace remote_engine { -class OpenAiEngine : public RemoteEngine { - public: - void GetModels( - std::shared_ptr json_body, - std::function&& callback) override; - - Json::Value GetRemoteModels() override; -}; -} // namespace remote_engine \ No newline at end of file diff --git a/engine/extensions/remote-engine/remote_engine.cc b/engine/extensions/remote-engine/remote_engine.cc index 04effb457..6361077dd 100644 --- a/engine/extensions/remote-engine/remote_engine.cc +++ b/engine/extensions/remote-engine/remote_engine.cc @@ -16,67 +16,14 @@ bool is_anthropic(const std::string& model) { return model.find("claude") != std::string::npos; } -struct AnthropicChunk { - std::string type; - std::string id; - int index; - std::string msg; - std::string model; - std::string stop_reason; - bool should_ignore = false; - - AnthropicChunk(const std::string& str) { - if (str.size() > 6) { - std::string s = str.substr(6); - try { - auto root = json_helper::ParseJsonString(s); - type = root["type"].asString(); - if (type == "message_start") { - id = root["message"]["id"].asString(); - model = root["message"]["model"].asString(); - } else if (type == "content_block_delta") { - index = root["index"].asInt(); - if (root["delta"]["type"].asString() == "text_delta") { - msg = root["delta"]["text"].asString(); - } - } else if (type == "message_delta") { - stop_reason = root["delta"]["stop_reason"].asString(); - } else { - // ignore other messages - should_ignore = true; - } - } catch (const std::exception& e) { - should_ignore = true; - CTL_WRN("JSON parse error: " << e.what()); - } - } else { - should_ignore = true; - } - } +bool is_openai(const std::string& model) { + return model.find("gpt") != std::string::npos; +} - std::string ToOpenAiFormatString() { - Json::Value root; - root["id"] = id; - root["object"] = "chat.completion.chunk"; - root["created"] = Json::Value(); - root["model"] = model; - root["system_fingerprint"] = "fp_e76890f0c3"; - Json::Value choices(Json::arrayValue); - Json::Value choice; - Json::Value content; - choice["index"] = 0; - content["content"] = msg; - if (type == "message_start") { - content["role"] = "assistant"; - content["refusal"] = Json::Value(); - } - choice["delta"] = content; - choice["finish_reason"] = stop_reason.empty() ? Json::Value() : stop_reason; - choices.append(choice); - root["choices"] = choices; - return "data: " + json_helper::DumpJsonString(root); - } -}; +constexpr const std::array kAnthropicModels = { + "claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022", + "claude-3-opus-20240229", "claude-3-sonnet-20240229", + "claude-3-haiku-20240307"}; } // namespace @@ -92,21 +39,13 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, while ((pos = context->buffer.find('\n')) != std::string::npos) { std::string line = context->buffer.substr(0, pos); context->buffer = context->buffer.substr(pos + 1); - CTL_TRC(line); // Skip empty lines if (line.empty() || line == "\r" || line.find("event:") != std::string::npos) continue; - // Remove "data: " prefix if present - // if (line.substr(0, 6) == "data: ") - // { - // line = line.substr(6); - // } - - // Skip [DONE] message - // std::cout << line << std::endl; + CTL_DBG(line); if (line == "data: [DONE]" || line.find("message_stop") != std::string::npos) { Json::Value status; @@ -120,17 +59,20 @@ size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, // Parse the JSON Json::Value chunk_json; - if (is_anthropic(context->model)) { - AnthropicChunk ac(line); - if (ac.should_ignore) + if (!is_openai(context->model)) { + std::string s = line.substr(6); + try { + auto root = json_helper::ParseJsonString(s); + root["model"] = context->model; + root["id"] = context->id; + root["stream"] = true; + auto result = context->renderer.Render(context->stream_template, root); + CTL_DBG(result); + chunk_json["data"] = "data: " + result + "\n\n"; + } catch (const std::exception& e) { + CTL_WRN("JSON parse error: " << e.what()); continue; - ac.model = context->model; - if (ac.type == "message_start") { - context->id = ac.id; - } else { - ac.id = context->id; } - chunk_json["data"] = ac.ToOpenAiFormatString() + "\n\n"; } else { chunk_json["data"] = line + "\n\n"; } @@ -178,10 +120,16 @@ CurlResponse RemoteEngine::MakeStreamingChatCompletionRequest( headers = curl_slist_append(headers, "Cache-Control: no-cache"); headers = curl_slist_append(headers, "Connection: keep-alive"); + std::string stream_template = chat_res_template_; + StreamContext context{ std::make_shared>( callback), - "", "", config.model}; + "", + "", + config.model, + renderer_, + stream_template}; curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str()); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); @@ -232,7 +180,8 @@ static size_t WriteCallback(char* ptr, size_t size, size_t nmemb, return size * nmemb; } -RemoteEngine::RemoteEngine() { +RemoteEngine::RemoteEngine(const std::string& engine_name) + : engine_name_(engine_name) { curl_global_init(CURL_GLOBAL_ALL); } @@ -395,7 +344,33 @@ bool RemoteEngine::LoadModelConfig(const std::string& model, void RemoteEngine::GetModels( std::shared_ptr json_body, std::function&& callback) { - CTL_WRN("Not implemented yet!"); + Json::Value json_resp; + Json::Value model_array(Json::arrayValue); + { + std::shared_lock l(models_mtx_); + for (const auto& [m, _] : models_) { + Json::Value val; + val["id"] = m; + val["engine"] = "openai"; + val["start_time"] = "_"; + val["model_size"] = "_"; + val["vram"] = "_"; + val["ram"] = "_"; + val["object"] = "model"; + model_array.append(val); + } + } + + json_resp["object"] = "list"; + json_resp["data"] = model_array; + + Json::Value status; + status["is_done"] = true; + status["has_error"] = false; + status["is_stream"] = false; + status["status_code"] = 200; + callback(std::move(status), std::move(json_resp)); + CTL_INF("Running models responded"); } void RemoteEngine::LoadModel( @@ -431,6 +406,21 @@ void RemoteEngine::LoadModel( } if (json_body->isMember("metadata")) { metadata_ = (*json_body)["metadata"]; + if (!metadata_["TransformReq"].isNull() && + !metadata_["TransformReq"]["chat_completions"].isNull() && + !metadata_["TransformReq"]["chat_completions"]["template"].isNull()) { + chat_req_template_ = + metadata_["TransformReq"]["chat_completions"]["template"].asString(); + CTL_INF(chat_req_template_); + } + + if (!metadata_["TransformResp"].isNull() && + !metadata_["TransformResp"]["chat_completions"].isNull() && + !metadata_["TransformResp"]["chat_completions"]["template"].isNull()) { + chat_res_template_ = + metadata_["TransformResp"]["chat_completions"]["template"].asString(); + CTL_INF(chat_res_template_); + } } Json::Value response; @@ -535,23 +525,6 @@ void RemoteEngine::HandleChatCompletion( std::string(e.what())); } - // Parse system for anthropic - if (is_anthropic(model)) { - bool has_system = false; - Json::Value msgs(Json::arrayValue); - for (auto& kv : (*json_body)["messages"]) { - if (kv["role"].asString() == "system") { - (*json_body)["system"] = kv["content"].asString(); - has_system = true; - } else { - msgs.append(kv); - } - } - if (has_system) { - (*json_body)["messages"] = msgs; - } - } - // Render with error handling try { result = renderer_.Render(template_str, *json_body); @@ -601,33 +574,42 @@ void RemoteEngine::HandleChatCompletion( // Transform Response std::string response_str; try { - // Check if required YAML nodes exist - if (!model_config->transform_resp["chat_completions"]) { - throw std::runtime_error( - "Missing 'chat_completions' node in transform_resp"); - } - if (!model_config->transform_resp["chat_completions"]["template"]) { - throw std::runtime_error("Missing 'template' node in chat_completions"); - } + std::string template_str; + if (!chat_res_template_.empty()) { + CTL_DBG( + "Use engine transform response template: " << chat_res_template_); + template_str = chat_res_template_; + } else { + // Check if required YAML nodes exist + if (!model_config->transform_resp["chat_completions"]) { + throw std::runtime_error( + "Missing 'chat_completions' node in transform_resp"); + } + if (!model_config->transform_resp["chat_completions"]["template"]) { + throw std::runtime_error( + "Missing 'template' node in chat_completions"); + } - // Validate JSON body - if (!response_json || response_json.isNull()) { - throw std::runtime_error("Invalid or null JSON body"); - } + // Validate JSON body + if (!response_json || response_json.isNull()) { + throw std::runtime_error("Invalid or null JSON body"); + } - // Get template string with error check - std::string template_str; - try { - template_str = - model_config->transform_resp["chat_completions"]["template"] - .as(); - } catch (const YAML::BadConversion& e) { - throw std::runtime_error("Failed to convert template node to string: " + - std::string(e.what())); + // Get template string with error check + + try { + template_str = + model_config->transform_resp["chat_completions"]["template"] + .as(); + } catch (const YAML::BadConversion& e) { + throw std::runtime_error( + "Failed to convert template node to string: " + + std::string(e.what())); + } } - // Render with error handling try { + response_json["stream"] = false; response_str = renderer_.Render(template_str, response_json); } catch (const std::exception& e) { throw std::runtime_error("Template rendering error: " + @@ -705,8 +687,43 @@ void RemoteEngine::HandleEmbedding( } Json::Value RemoteEngine::GetRemoteModels() { - CTL_WRN("Not implemented yet!"); - return {}; + if (metadata_["get_models_url"].isNull() || + metadata_["get_models_url"].asString().empty()) { + if (engine_name_ == kAnthropicEngine) { + Json::Value json_resp; + Json::Value model_array(Json::arrayValue); + for (const auto& m : kAnthropicModels) { + Json::Value val; + val["id"] = std::string(m); + val["engine"] = "anthropic"; + val["created"] = "_"; + val["object"] = "model"; + model_array.append(val); + } + + json_resp["object"] = "list"; + json_resp["data"] = model_array; + CTL_INF("Remote models responded"); + return json_resp; + } else { + return Json::Value(); + } + } else { + auto response = MakeGetModelsRequest(); + if (response.error) { + Json::Value error; + error["error"] = response.error_message; + return error; + } + Json::Value response_json; + Json::Reader reader; + if (!reader.parse(response.body, response_json)) { + Json::Value error; + error["error"] = "Failed to parse response"; + return error; + } + return response_json; + } } } // namespace remote_engine \ No newline at end of file diff --git a/engine/extensions/remote-engine/remote_engine.h b/engine/extensions/remote-engine/remote_engine.h index 8ce6fa652..d8dfbad61 100644 --- a/engine/extensions/remote-engine/remote_engine.h +++ b/engine/extensions/remote-engine/remote_engine.h @@ -14,9 +14,6 @@ // Helper for CURL response namespace remote_engine { -inline bool IsRemoteEngine(std::string_view e) { - return e == kAnthropicEngine || e == kOpenAiEngine; -} struct StreamContext { std::shared_ptr> callback; @@ -24,6 +21,8 @@ struct StreamContext { // Cache value for Anthropic std::string id; std::string model; + TemplateRenderer& renderer; + std::string stream_template; }; struct CurlResponse { std::string body; @@ -49,8 +48,10 @@ class RemoteEngine : public RemoteEngineI { std::unordered_map models_; TemplateRenderer renderer_; Json::Value metadata_; + std::string chat_req_template_; + std::string chat_res_template_; std::string api_key_template_; - std::unique_ptr async_file_logger_; + std::string engine_name_; // Helper functions CurlResponse MakeChatCompletionRequest(const ModelConfig& config, @@ -67,7 +68,7 @@ class RemoteEngine : public RemoteEngineI { ModelConfig* GetModelConfig(const std::string& model); public: - RemoteEngine(); + explicit RemoteEngine(const std::string& engine_name); virtual ~RemoteEngine(); // Main interface implementations @@ -95,7 +96,7 @@ class RemoteEngine : public RemoteEngineI { void HandleEmbedding( std::shared_ptr json_body, std::function&& callback) override; - + Json::Value GetRemoteModels() override; }; diff --git a/engine/main.cc b/engine/main.cc index 8ca5ffd1f..5cc6c740e 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -11,6 +11,7 @@ #include "controllers/models.h" #include "controllers/process_manager.h" #include "controllers/server.h" +#include "controllers/swagger.h" #include "controllers/threads.h" #include "database/database.h" #include "migrations/migration_manager.h" @@ -50,7 +51,8 @@ #error "Unsupported platform!" #endif -void RunServer(std::optional port, bool ignore_cout) { +void RunServer(std::optional host, std::optional port, + bool ignore_cout) { #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) signal(SIGINT, SIG_IGN); #elif defined(_WIN32) @@ -61,9 +63,16 @@ void RunServer(std::optional port, bool ignore_cout) { reinterpret_cast(console_ctrl_handler), true); #endif auto config = file_manager_utils::GetCortexConfig(); - if (port.has_value() && *port != std::stoi(config.apiServerPort)) { + if (host.has_value() || port.has_value()) { + if (host.has_value() && *host != config.apiServerHost) { + config.apiServerHost = *host; + } + + if (port.has_value() && *port != std::stoi(config.apiServerPort)) { + config.apiServerPort = std::to_string(*port); + } + auto config_path = file_manager_utils::GetConfigurationPath(); - config.apiServerPort = std::to_string(*port); auto result = config_yaml_utils::CortexConfigMgr::GetInstance().DumpYamlConfig( config, config_path.string()); @@ -71,6 +80,7 @@ void RunServer(std::optional port, bool ignore_cout) { CTL_ERR("Error update " << config_path.string() << result.error()); } } + if (!ignore_cout) { std::cout << "Host: " << config.apiServerHost << " Port: " << config.apiServerPort << "\n"; @@ -155,6 +165,8 @@ void RunServer(std::optional port, bool ignore_cout) { file_watcher_srv->start(); // initialize custom controllers + auto swagger_ctl = std::make_shared(config.apiServerHost, + config.apiServerPort); auto file_ctl = std::make_shared(file_srv, message_srv); auto assistant_ctl = std::make_shared(assistant_srv); auto thread_ctl = std::make_shared(thread_srv, message_srv); @@ -169,6 +181,7 @@ void RunServer(std::optional port, bool ignore_cout) { std::make_shared(inference_svc, engine_service); auto config_ctl = std::make_shared(config_service); + drogon::app().registerController(swagger_ctl); drogon::app().registerController(file_ctl); drogon::app().registerController(assistant_ctl); drogon::app().registerController(thread_ctl); @@ -279,6 +292,7 @@ int main(int argc, char* argv[]) { // avoid printing logs to terminal is_server = true; + std::optional server_host; std::optional server_port; bool ignore_cout_log = false; #if defined(_WIN32) @@ -292,6 +306,8 @@ int main(int argc, char* argv[]) { std::wstring v = argv[i + 1]; file_manager_utils::cortex_data_folder_path = cortex::wc::WstringToUtf8(v); + } else if (command == L"--host") { + server_host = cortex::wc::WstringToUtf8(argv[i + 1]); } else if (command == L"--port") { server_port = std::stoi(argv[i + 1]); } else if (command == L"--ignore_cout") { @@ -308,6 +324,8 @@ int main(int argc, char* argv[]) { file_manager_utils::cortex_config_file_path = argv[i + 1]; } else if (strcmp(argv[i], "--data_folder_path") == 0) { file_manager_utils::cortex_data_folder_path = argv[i + 1]; + } else if (strcmp(argv[i], "--host") == 0) { + server_host = argv[i + 1]; } else if (strcmp(argv[i], "--port") == 0) { server_port = std::stoi(argv[i + 1]); } else if (strcmp(argv[i], "--ignore_cout") == 0) { @@ -363,6 +381,6 @@ int main(int argc, char* argv[]) { } } - RunServer(server_port, ignore_cout_log); + RunServer(server_host, server_port, ignore_cout_log); return 0; } diff --git a/engine/repositories/file_fs_repository.cc b/engine/repositories/file_fs_repository.cc index b9ab4fec6..a209d33c3 100644 --- a/engine/repositories/file_fs_repository.cc +++ b/engine/repositories/file_fs_repository.cc @@ -18,9 +18,28 @@ cpp::result FileFsRepository::StoreFile( } cortex::db::File db; - auto file_full_path = file_container_path / file_metadata.filename; - if (std::filesystem::exists(file_full_path)) { - return cpp::fail("File already exists: " + file_full_path.string()); + auto original_filename = file_metadata.filename; + auto file_full_path = file_container_path / original_filename; + + // Handle duplicate filenames + int counter = 1; + while (std::filesystem::exists(file_full_path)) { + auto dot_pos = original_filename.find_last_of('.'); + std::string name_part; + std::string ext_part; + + if (dot_pos != std::string::npos) { + name_part = original_filename.substr(0, dot_pos); + ext_part = original_filename.substr(dot_pos); + } else { + name_part = original_filename; + ext_part = ""; + } + + auto new_filename = name_part + "_" + std::to_string(counter) + ext_part; + file_full_path = file_container_path / new_filename; + file_metadata.filename = new_filename; + counter++; } try { diff --git a/engine/repositories/message_fs_repository.cc b/engine/repositories/message_fs_repository.cc index 422242e3a..db6f5dd6e 100644 --- a/engine/repositories/message_fs_repository.cc +++ b/engine/repositories/message_fs_repository.cc @@ -80,34 +80,23 @@ MessageFsRepository::ListMessages(const std::string& thread_id, uint8_t limit, messages.end()); } - const bool is_descending = (order == "desc"); - std::sort( - messages.begin(), messages.end(), - [is_descending](const OpenAi::Message& a, const OpenAi::Message& b) { - return is_descending ? (a.id > b.id) : (a.id < b.id); - }); - auto start_it = messages.begin(); auto end_it = messages.end(); if (!after.empty()) { - start_it = std::lower_bound( - messages.begin(), messages.end(), after, - [is_descending](const OpenAi::Message& msg, const std::string& value) { - return is_descending ? (msg.id > value) : (msg.id < value); - }); - - if (start_it != messages.end() && start_it->id == after) { - ++start_it; - } + start_it = std::find_if( + messages.begin(), messages.end(), + [&after](const OpenAi::Message& msg) { return msg.id > after; }); } if (!before.empty()) { - end_it = std::upper_bound( - start_it, messages.end(), before, - [is_descending](const std::string& value, const OpenAi::Message& msg) { - return is_descending ? (value > msg.id) : (value < msg.id); - }); + end_it = std::find_if( + start_it, messages.end(), + [&before](const OpenAi::Message& msg) { return msg.id >= before; }); + } + + if (order == "desc") { + std::reverse(start_it, end_it); } const size_t available_messages = std::distance(start_it, end_it); diff --git a/engine/services/engine_service.cc b/engine/services/engine_service.cc index 035ef4a4e..bdd080f50 100644 --- a/engine/services/engine_service.cc +++ b/engine/services/engine_service.cc @@ -6,8 +6,7 @@ #include #include "algorithm" #include "database/engines.h" -#include "extensions/remote-engine/anthropic_engine.h" -#include "extensions/remote-engine/openai_engine.h" +#include "extensions/remote-engine/remote_engine.h" #include "utils/archive_utils.h" #include "utils/engine_constants.h" #include "utils/engine_matcher_utils.h" @@ -187,7 +186,7 @@ cpp::result EngineService::UninstallEngineVariant( // TODO: handle uninstall remote engine // only delete a remote engine if no model are using it auto exist_engine = GetEngineByNameAndVariant(engine); - if (exist_engine.has_value() && exist_engine.value().type == "remote") { + if (exist_engine.has_value() && exist_engine.value().type == kRemote) { auto result = DeleteEngine(exist_engine.value().id); if (!result.empty()) { // This mean no error when delete model CTL_ERR("Failed to delete engine: " << result); @@ -333,15 +332,9 @@ cpp::result EngineService::DownloadEngine( } else { CTL_INF("Set default engine variant: " << res.value().variant); } - auto create_res = - EngineService::UpsertEngine(engine, // engine_name - "local", // todo - luke - "", // todo - luke - "", // todo - luke - normalize_version, variant.value(), - "Default", // todo - luke - "" // todo - luke - ); + auto create_res = EngineService::UpsertEngine( + engine, // engine_name + kLocal, "", "", normalize_version, variant.value(), "Default", ""); if (create_res.has_value()) { CTL_ERR("Failed to create engine entry: " << create_res->engine_name); @@ -683,17 +676,13 @@ cpp::result EngineService::LoadEngine( } // Check for remote engine - if (remote_engine::IsRemoteEngine(engine_name)) { + if (IsRemoteEngine(engine_name)) { auto exist_engine = GetEngineByNameAndVariant(engine_name); if (exist_engine.has_error()) { return cpp::fail("Remote engine '" + engine_name + "' is not installed"); } - if (engine_name == kOpenAiEngine) { - engines_[engine_name].engine = new remote_engine::OpenAiEngine(); - } else { - engines_[engine_name].engine = new remote_engine::AnthropicEngine(); - } + engines_[engine_name].engine = new remote_engine::RemoteEngine(engine_name); CTL_INF("Loaded engine: " << engine_name); return {}; @@ -715,21 +704,23 @@ cpp::result EngineService::LoadEngine( #if defined(_WIN32) || defined(_WIN64) // register deps - std::vector paths{}; - paths.push_back(std::move(cuda_path)); - paths.push_back(std::move(engine_dir_path)); - - CTL_DBG("Registering dylib for " - << ne << " with " << std::to_string(paths.size()) << " paths."); - for (const auto& path : paths) { - CTL_DBG("Registering path: " << path.string()); - } + if (!(getenv("ENGINE_PATH"))) { + std::vector paths{}; + paths.push_back(std::move(cuda_path)); + paths.push_back(std::move(engine_dir_path)); - auto reg_result = dylib_path_manager_->RegisterPath(ne, paths); - if (reg_result.has_error()) { - CTL_DBG("Failed register lib paths for: " << ne); - } else { - CTL_DBG("Registered lib paths for: " << ne); + CTL_DBG("Registering dylib for " + << ne << " with " << std::to_string(paths.size()) << " paths."); + for (const auto& path : paths) { + CTL_DBG("Registering path: " << path.string()); + } + + auto reg_result = dylib_path_manager_->RegisterPath(ne, paths); + if (reg_result.has_error()) { + CTL_DBG("Failed register lib paths for: " << ne); + } else { + CTL_DBG("Registered lib paths for: " << ne); + } } #endif @@ -899,7 +890,7 @@ cpp::result EngineService::IsEngineReady( auto ne = NormalizeEngine(engine); // Check for remote engine - if (remote_engine::IsRemoteEngine(engine)) { + if (IsRemoteEngine(engine)) { auto exist_engine = GetEngineByNameAndVariant(engine); if (exist_engine.has_error()) { return cpp::fail("Remote engine '" + engine + "' is not installed"); @@ -1075,11 +1066,7 @@ cpp::result EngineService::GetRemoteModels( if (exist_engine.has_error()) { return cpp::fail("Remote engine '" + engine_name + "' is not installed"); } - if (engine_name == kOpenAiEngine) { - engines_[engine_name].engine = new remote_engine::OpenAiEngine(); - } else { - engines_[engine_name].engine = new remote_engine::AnthropicEngine(); - } + engines_[engine_name].engine = new remote_engine::RemoteEngine(engine_name); CTL_INF("Loaded engine: " << engine_name); } @@ -1092,6 +1079,16 @@ cpp::result EngineService::GetRemoteModels( } } +bool EngineService::IsRemoteEngine(const std::string& engine_name) { + auto ne = Repo2Engine(engine_name); + auto local_engines = file_manager_utils::GetCortexConfig().supportedEngines; + for (auto const& le : local_engines) { + if (le == ne) + return false; + } + return true; +} + cpp::result, std::string> EngineService::GetSupportedEngineNames() { return file_manager_utils::GetCortexConfig().supportedEngines; diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 9253eccf1..527123cb5 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -153,6 +153,8 @@ class EngineService : public EngineServiceI { void RegisterEngineLibPath(); + bool IsRemoteEngine(const std::string& engine_name) override; + private: bool IsEngineLoaded(const std::string& engine); diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index 6a45733d3..ce83152c4 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -773,7 +773,7 @@ cpp::result ModelService::StartModel( auto mc = yaml_handler.GetModelConfig(); // Running remote model - if (remote_engine::IsRemoteEngine(mc.engine)) { + if (engine_svc_->IsRemoteEngine(mc.engine)) { config::RemoteModelConfig remote_mc; remote_mc.LoadFromYamlFile( diff --git a/engine/test/components/CMakeLists.txt b/engine/test/components/CMakeLists.txt index 58c5d83d6..0df46cfc2 100644 --- a/engine/test/components/CMakeLists.txt +++ b/engine/test/components/CMakeLists.txt @@ -16,6 +16,7 @@ add_executable(${PROJECT_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/file_manager_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/curl_utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../utils/system_info_utils.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../extensions/remote-engine/template_renderer.cc ) find_package(Drogon CONFIG REQUIRED) diff --git a/engine/test/components/main.cc b/engine/test/components/main.cc index 08080680e..ba24a3e01 100644 --- a/engine/test/components/main.cc +++ b/engine/test/components/main.cc @@ -4,11 +4,15 @@ int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); +#if defined(NDEBUG) ::testing::GTEST_FLAG(filter) = "-FileManagerConfigTest.*"; int ret = RUN_ALL_TESTS(); if (ret != 0) return ret; ::testing::GTEST_FLAG(filter) = "FileManagerConfigTest.*"; ret = RUN_ALL_TESTS(); +#else + int ret = RUN_ALL_TESTS(); +#endif return ret; } diff --git a/engine/test/components/test_models_db.cc b/engine/test/components/test_models_db.cc index 06294aa8c..0cc9b0344 100644 --- a/engine/test/components/test_models_db.cc +++ b/engine/test/components/test_models_db.cc @@ -1,6 +1,5 @@ #include "database/models.h" #include "gtest/gtest.h" -#include "utils/file_manager_utils.h" namespace cortex::db { namespace { @@ -122,4 +121,4 @@ TEST_F(ModelsTestSuite, TestHasModel) { EXPECT_TRUE(model_list_.DeleteModelEntry(kTestModel.model).value()); } -} // namespace cortex::db \ No newline at end of file +} // namespace cortex::db diff --git a/engine/test/components/test_remote_engine.cc b/engine/test/components/test_remote_engine.cc new file mode 100644 index 000000000..bfac76f49 --- /dev/null +++ b/engine/test/components/test_remote_engine.cc @@ -0,0 +1,81 @@ +#include "extensions/remote-engine/template_renderer.h" +#include "gtest/gtest.h" +#include "utils/json_helper.h" + +class RemoteEngineTest : public ::testing::Test {}; + +TEST_F(RemoteEngineTest, OpenAiToAnthropicRequest) { + std::string tpl = + R"({ + {% for key, value in input_request %} + {% if key == "messages" %} + {% if input_request.messages.0.role == "system" %} + "system": "{{ input_request.messages.0.content }}", + "messages": [ + {% for message in input_request.messages %} + {% if not loop.is_first %} + {"role": "{{ message.role }}", "content": "{{ message.content }}" } {% if not loop.is_last %},{% endif %} + {% endif %} + {% endfor %} + ] + {% else %} + "messages": [ + {% for message in input_request.messages %} + {"role": " {{ message.role}}", "content": "{{ message.content }}" } {% if not loop.is_last %},{% endif %} + {% endfor %} + ] + {% endif %} + {% else if key == "system" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} + "{{ key }}": {{ tojson(value) }} + {% endif %} + {% if not loop.is_last %},{% endif %} + {% endfor %} })"; + { + std::string message_with_system = R"({ + "messages": [ + {"role": "system", "content": "You are a seasoned data scientist at a Fortune 500 company."}, + {"role": "user", "content": "Hello, world"} + ], + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, +})"; + + auto data = json_helper::ParseJsonString(message_with_system); + + remote_engine::TemplateRenderer rdr; + auto res = rdr.Render(tpl, data); + + auto res_json = json_helper::ParseJsonString(res); + EXPECT_EQ(data["model"].asString(), res_json["model"].asString()); + EXPECT_EQ(data["max_tokens"].asInt(), res_json["max_tokens"].asInt()); + for (auto const& msg : data["messages"]) { + if (msg["role"].asString() == "system") { + EXPECT_EQ(msg["content"].asString(), res_json["system"].asString()); + } else if (msg["role"].asString() == "user") { + EXPECT_EQ(msg["content"].asString(), + res_json["messages"][0]["content"].asString()); + } + } + } + + { + std::string message_without_system = R"({ + "messages": [ + {"role": "user", "content": "Hello, world"} + ], + "model": "claude-3-5-sonnet-20241022", + "max_tokens": 1024, +})"; + + auto data = json_helper::ParseJsonString(message_without_system); + + remote_engine::TemplateRenderer rdr; + auto res = rdr.Render(tpl, data); + + auto res_json = json_helper::ParseJsonString(res); + EXPECT_EQ(data["model"].asString(), res_json["model"].asString()); + EXPECT_EQ(data["max_tokens"].asInt(), res_json["max_tokens"].asInt()); + EXPECT_EQ(data["messages"][0]["content"].asString(), + res_json["messages"][0]["content"].asString()); + } +} \ No newline at end of file diff --git a/engine/utils/engine_constants.h b/engine/utils/engine_constants.h index 020109fd8..dcdf6a443 100644 --- a/engine/utils/engine_constants.h +++ b/engine/utils/engine_constants.h @@ -6,6 +6,9 @@ constexpr const auto kTrtLlmEngine = "tensorrt-llm"; constexpr const auto kOpenAiEngine = "openai"; constexpr const auto kAnthropicEngine = "anthropic"; +constexpr const auto kRemote = "remote"; +constexpr const auto kLocal = "local"; + constexpr const auto kOnnxRepo = "cortex.onnx"; constexpr const auto kLlamaRepo = "cortex.llamacpp"; constexpr const auto kTrtLlmRepo = "cortex.tensorrt-llm"; diff --git a/engine/utils/hardware/cpu_info.h b/engine/utils/hardware/cpu_info.h index 4c2cb3027..4395cc8dd 100644 --- a/engine/utils/hardware/cpu_info.h +++ b/engine/utils/hardware/cpu_info.h @@ -10,7 +10,10 @@ namespace cortex::hw { inline CPU GetCPUInfo() { - auto cpu = hwinfo::getAllCPUs()[0]; + auto res = hwinfo::getAllCPUs(); + if (res.empty()) + return CPU{}; + auto cpu = res[0]; cortex::cpuid::CpuInfo inst; return CPU{.cores = cpu.numPhysicalCores(), .arch = std::string(GetArch()),