Skip to content

Commit

Permalink
feat: remote engine (#1666)
Browse files Browse the repository at this point in the history
* Init remote engine

* Fix: CI build windows

* Fix: CI build windows

* Fix: CI build windows

* Fix: CI build windows

* feat: new db schema for model and template for engine

* Add remote model

* Add Get, List, Update support for remote models

* change model_id to model in remote engine

* fix: mac compatibility

* chore: some refactors before making big changes

* feat: db ops for engines

* chore: small refactor before more changes

* Update engine

* refine db schema, composite key for engines

* add entry definition for engine at db layer

* complete add, get engine operations

* engine managements

* Integrate with remote engine to run remote model

* error handling and response transform

* Support for stream request

* chore: fix conflicts

* feat: anthropic

* feat: support anthropic

* feat: support anthropic

* chore: rename

* chore: cleanup and fix unit tests

* fix: issue with db

* chore: refactor remote engine

* fix: e2e tests

* fix: e2e tests

* chore: API docs

* fix: use different interface for remote engine

---------

Co-authored-by: Luke Nguyen <[email protected]>
Co-authored-by: vansangpfiev <[email protected]>
Co-authored-by: vansangpfiev <[email protected]>
  • Loading branch information
4 people authored Dec 5, 2024
1 parent 2b74824 commit 79f7679
Show file tree
Hide file tree
Showing 38 changed files with 2,934 additions and 125 deletions.
232 changes: 229 additions & 3 deletions docs/static/openapi/cortex.json
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,73 @@
}
}
},
"/v1/models/add": {
"post": {
"operationId": "ModelsController_addModel",
"summary": "Add a remote model",
"description": "Add a new remote model configuration to the system.",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/AddModelRequest"
}
}
}
},
"responses": {
"200": {
"description": "Successful response",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"message": {
"type": "string"
},
"model": {
"type": "object",
"properties": {
"model": {
"type": "string"
},
"engine": {
"type": "string"
},
"version": {
"type": "string"
}
}
}
}
},
"example": {
"message": "Model added successfully!",
"model": {
"model": "claude-3-5-sonnet-20241022",
"engine": "anthropic",
"version": "2023-06-01"
}
}
}
}
},
"400": {
"description": "Bad request",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SimpleErrorResponse"
}
}
}
}
},
"tags": ["Pulling Models"]
}
},
"/v1/models": {
"get": {
"operationId": "ModelsController_findAll",
Expand Down Expand Up @@ -1417,7 +1484,7 @@
"required": true,
"schema": {
"type": "string",
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm", "openai", "anthropic"],
"default": "llama-cpp"
},
"description": "The type of engine"
Expand All @@ -1439,6 +1506,31 @@
"type": "string",
"description": "The variant of the engine to install (optional)",
"example": "mac-arm64"
},
"type": {
"type": "string",
"description": "The type of connection, remote or local",
"example": "remote"
},
"url": {
"type": "string",
"description": "The URL for the API endpoint for remote engine",
"example": "https://api.openai.com"
},
"api_key": {
"type": "string",
"description": "The API key for authentication for remote engine",
"example": ""
},
"metadata": {
"type": "object",
"properties": {
"get_models_url": {
"type": "string",
"description": "The URL to get models",
"example": "https://api.openai.com/v1/models"
}
}
}
}
}
Expand Down Expand Up @@ -1475,7 +1567,7 @@
"required": true,
"schema": {
"type": "string",
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm", "openai", "anthropic"],
"default": "llama-cpp"
},
"description": "The type of engine"
Expand Down Expand Up @@ -1690,7 +1782,7 @@
"required": true,
"schema": {
"type": "string",
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
"enum": ["llama-cpp", "onnxruntime", "tensorrt-llm", "openai", "anthropic"],
"default": "llama-cpp"
},
"description": "The name of the engine to update"
Expand Down Expand Up @@ -3636,6 +3728,109 @@
}
}
},
"AddModelRequest": {
"type": "object",
"required": ["model", "engine", "version", "inference_params", "TransformReq", "TransformResp", "metadata"],
"properties": {
"model": {
"type": "string",
"description": "The identifier of the model."
},
"api_key_template": {
"type": "string",
"description": "Template for the API key header."
},
"engine": {
"type": "string",
"description": "The engine used for the model."
},
"version": {
"type": "string",
"description": "The version of the model."
},
"inference_params": {
"type": "object",
"properties": {
"temperature": {
"type": "number"
},
"top_p": {
"type": "number"
},
"frequency_penalty": {
"type": "number"
},
"presence_penalty": {
"type": "number"
},
"max_tokens": {
"type": "integer"
},
"stream": {
"type": "boolean"
}
}
},
"TransformReq": {
"type": "object",
"properties": {
"get_models": {
"type": "object"
},
"chat_completions": {
"type": "object",
"properties": {
"url": {
"type": "string"
},
"template": {
"type": "string"
}
}
},
"embeddings": {
"type": "object"
}
}
},
"TransformResp": {
"type": "object",
"properties": {
"chat_completions": {
"type": "object",
"properties": {
"template": {
"type": "string"
}
}
},
"embeddings": {
"type": "object"
}
}
},
"metadata": {
"type": "object",
"properties": {
"author": {
"type": "string"
},
"description": {
"type": "string"
},
"end_point": {
"type": "string"
},
"logo": {
"type": "string"
},
"api_key_url": {
"type": "string"
}
}
}
}
},
"CreateModelDto": {
"type": "object",
"properties": {
Expand Down Expand Up @@ -4305,6 +4500,37 @@
"type": "integer",
"description": "Number of GPU layers.",
"example": 33
},
"api_key_template": {
"type": "string",
"description": "Template for the API key header."
},
"version": {
"type": "string",
"description": "The version of the model."
},
"inference_params": {
"type": "object",
"properties": {
"temperature": {
"type": "number"
},
"top_p": {
"type": "number"
},
"frequency_penalty": {
"type": "number"
},
"presence_penalty": {
"type": "number"
},
"max_tokens": {
"type": "integer"
},
"stream": {
"type": "boolean"
}
}
}
}
},
Expand Down
8 changes: 6 additions & 2 deletions engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cortex_openapi.h"
add_executable(${TARGET_NAME} main.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/remote_engine.cc
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/openai_engine.cc
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/anthropic_engine.cc
${CMAKE_CURRENT_SOURCE_DIR}/extensions/remote-engine/template_renderer.cc
)

target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
Expand Down Expand Up @@ -171,17 +175,17 @@ endif()
aux_source_directory(controllers CTL_SRC)
aux_source_directory(repositories REPO_SRC)
aux_source_directory(services SERVICES_SRC)
aux_source_directory(common COMMON_SRC)
aux_source_directory(models MODEL_SRC)
aux_source_directory(cortex-common CORTEX_COMMON)
aux_source_directory(config CONFIG_SRC)
aux_source_directory(database DB_SRC)
aux_source_directory(extensions EX_SRC)
aux_source_directory(migrations MIGR_SRC)
aux_source_directory(utils UTILS_SRC)

target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} )

target_sources(${TARGET_NAME} PRIVATE ${UTILS_SRC} ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC} ${DB_SRC} ${MIGR_SRC} ${REPO_SRC})
target_sources(${TARGET_NAME} PRIVATE ${UTILS_SRC} ${CONFIG_SRC} ${CTL_SRC} ${COMMON_SRC} ${SERVICES_SRC} ${DB_SRC} ${EX_SRC} ${MIGR_SRC} ${REPO_SRC})

set_target_properties(${TARGET_NAME} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}
Expand Down
7 changes: 6 additions & 1 deletion engine/cli/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ add_executable(${TARGET_NAME} main.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/model_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/inference_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../services/hardware_service.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/remote_engine.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/openai_engine.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/anthropic_engine.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extensions/remote-engine/template_renderer.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/easywsclient.cc
${CMAKE_CURRENT_SOURCE_DIR}/utils/download_progress.cc
${CMAKE_CURRENT_SOURCE_DIR}/../utils/config_yaml_utils.cc
Expand Down Expand Up @@ -121,11 +125,12 @@ aux_source_directory(../cortex-common CORTEX_COMMON)
aux_source_directory(../config CONFIG_SRC)
aux_source_directory(commands COMMANDS_SRC)
aux_source_directory(../database DB_SRC)
aux_source_directory(../extensions EX_SRC)

target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/.. )
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})

target_sources(${TARGET_NAME} PRIVATE ${COMMANDS_SRC} ${CONFIG_SRC} ${COMMON_SRC} ${DB_SRC})
target_sources(${TARGET_NAME} PRIVATE ${COMMANDS_SRC} ${CONFIG_SRC} ${COMMON_SRC} ${DB_SRC} ${EX_SRC})

set_target_properties(${TARGET_NAME} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}
Expand Down
6 changes: 5 additions & 1 deletion engine/common/engine_servicei.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#include <json/value.h>
#include <string>
#include <vector>
#include "database/engines.h"
#include "utils/result.hpp"

// TODO: namh think of the other name
struct DefaultEngineVariant {
std::string engine;
Expand Down Expand Up @@ -54,4 +54,8 @@ class EngineServiceI {

virtual cpp::result<void, std::string> UnloadEngine(
const std::string& engine_name) = 0;
virtual cpp::result<cortex::db::EngineEntry, std::string>
GetEngineByNameAndVariant(
const std::string& engine_name,
const std::optional<std::string> variant = std::nullopt) = 0;
};
Loading

0 comments on commit 79f7679

Please sign in to comment.