From 166cdb5c6da4cf684d5ffe78defbb576330e8b2d Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 1 Nov 2024 09:12:05 +0700 Subject: [PATCH 1/7] fix: use download event type to listen ws on client side (#1601) * fix: use download event type to listen ws on client side * fix: format * fix: remove unused --------- Co-authored-by: vansangpfiev --- engine/cli/commands/engine_install_cmd.cc | 7 +++--- engine/cli/commands/model_pull_cmd.cc | 2 +- engine/cli/utils/download_progress.cc | 29 ++++++++++++++++------- engine/cli/utils/download_progress.h | 2 +- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/engine/cli/commands/engine_install_cmd.cc b/engine/cli/commands/engine_install_cmd.cc index a0d008c60..f046d89e1 100644 --- a/engine/cli/commands/engine_install_cmd.cc +++ b/engine/cli/commands/engine_install_cmd.cc @@ -35,8 +35,9 @@ bool EngineInstallCmd::Exec(const std::string& engine, DownloadProgress dp; dp.Connect(host_, port_); // engine can be small, so need to start ws first - auto dp_res = std::async(std::launch::deferred, - [&dp, &engine] { return dp.Handle(engine); }); + auto dp_res = std::async(std::launch::deferred, [&dp] { + return dp.Handle(DownloadType::Engine); + }); CLI_LOG("Validating download items, please wait..") httplib::Client cli(host_ + ":" + std::to_string(port_)); @@ -68,7 +69,7 @@ bool EngineInstallCmd::Exec(const std::string& engine, bool check_cuda_download = !system_info_utils::GetCudaVersion().empty(); if (check_cuda_download) { - if (!dp.Handle("cuda")) + if (!dp.Handle(DownloadType::CudaToolkit)) return false; } diff --git a/engine/cli/commands/model_pull_cmd.cc b/engine/cli/commands/model_pull_cmd.cc index 8d6757d61..ad8938146 100644 --- a/engine/cli/commands/model_pull_cmd.cc +++ b/engine/cli/commands/model_pull_cmd.cc @@ -149,7 +149,7 @@ std::optional ModelPullCmd::Exec(const std::string& host, int port, reinterpret_cast(console_ctrl_handler), true); #endif dp.Connect(host, port); - if (!dp.Handle(model_id)) + if (!dp.Handle(DownloadType::Model)) return std::nullopt; if (force_stop) return std::nullopt; diff --git a/engine/cli/utils/download_progress.cc b/engine/cli/utils/download_progress.cc index e77e43beb..9c38d4bdf 100644 --- a/engine/cli/utils/download_progress.cc +++ b/engine/cli/utils/download_progress.cc @@ -4,10 +4,23 @@ #include "common/event.h" #include "indicators/dynamic_progress.hpp" #include "indicators/progress_bar.hpp" +#include "utils/engine_constants.h" #include "utils/format_utils.h" #include "utils/json_helper.h" #include "utils/logging_utils.h" +namespace { +std::string Repo2Engine(const std::string& r) { + if (r == kLlamaRepo) { + return kLlamaEngine; + } else if (r == kOnnxRepo) { + return kOnnxEngine; + } else if (r == kTrtLlmRepo) { + return kTrtLlmEngine; + } + return r; +}; +} // namespace bool DownloadProgress::Connect(const std::string& host, int port) { if (ws_) { CTL_INF("Already connected!"); @@ -21,7 +34,7 @@ bool DownloadProgress::Connect(const std::string& host, int port) { return true; } -bool DownloadProgress::Handle(const std::string& id) { +bool DownloadProgress::Handle(const DownloadType& event_type) { assert(!!ws_); std::unordered_map totals; status_ = DownloadStatus::DownloadStarted; @@ -30,7 +43,7 @@ bool DownloadProgress::Handle(const std::string& id) { std::vector> items; indicators::show_console_cursor(false); auto handle_message = [this, &bars, &items, &totals, - id](const std::string& message) { + event_type](const std::string& message) { CTL_INF(message); auto pad_string = [](const std::string& str, @@ -50,8 +63,8 @@ bool DownloadProgress::Handle(const std::string& id) { auto ev = cortex::event::GetDownloadEventFromJson( json_helper::ParseJsonString(message)); - // Ignore other task ids - if (ev.download_task_.id != id) { + // Ignore other task type + if (ev.download_task_.type != event_type) { return; } @@ -63,7 +76,7 @@ bool DownloadProgress::Handle(const std::string& id) { indicators::option::BarWidth{50}, indicators::option::Start{"["}, indicators::option::Fill{"="}, indicators::option::Lead{">"}, indicators::option::End{"]"}, - indicators::option::PrefixText{pad_string(i.id)}, + indicators::option::PrefixText{pad_string(Repo2Engine(i.id))}, indicators::option::ForegroundColor{indicators::Color::white}, indicators::option::ShowRemainingTime{true})); bars->push_back(*(items.back())); @@ -80,7 +93,7 @@ bool DownloadProgress::Handle(const std::string& id) { if (ev.type_ == DownloadStatus::DownloadStarted || ev.type_ == DownloadStatus::DownloadUpdated) { (*bars)[i].set_option(indicators::option::PrefixText{ - pad_string(it.id) + + pad_string(Repo2Engine(it.id)) + std::to_string( int(static_cast(downloaded) / totals[it.id] * 100)) + '%'}); @@ -94,8 +107,8 @@ bool DownloadProgress::Handle(const std::string& id) { auto total_str = format_utils::BytesToHumanReadable(totals[it.id]); (*bars)[i].set_option( indicators::option::PostfixText{total_str + "/" + total_str}); - (*bars)[i].set_option( - indicators::option::PrefixText{pad_string(it.id) + "100%"}); + (*bars)[i].set_option(indicators::option::PrefixText{ + pad_string(Repo2Engine(it.id)) + "100%"}); (*bars)[i].set_progress(100); CTL_INF("Download success"); diff --git a/engine/cli/utils/download_progress.h b/engine/cli/utils/download_progress.h index 4f71e6d84..98fe85654 100644 --- a/engine/cli/utils/download_progress.h +++ b/engine/cli/utils/download_progress.h @@ -10,7 +10,7 @@ class DownloadProgress { public: bool Connect(const std::string& host, int port); - bool Handle(const std::string& id); + bool Handle(const DownloadType& event_type); void ForceStop() { force_stop_ = true; } From f5fbad6d5ca9035f44c9a37f230daa5bf8b4d7c0 Mon Sep 17 00:00:00 2001 From: Louis Date: Fri, 1 Nov 2024 11:26:18 +0700 Subject: [PATCH 2/7] chore: add model name as a parameter support during import via API (#1600) --- docs/static/openapi/cortex.json | 86 +++++++++++++++++++ engine/controllers/models.cc | 13 ++- engine/e2e-test/test_api_model_import.py | 24 +++++- .../test_api_model_pull_direct_url.py | 29 +++++-- engine/services/model_service.cc | 22 +++-- engine/services/model_service.h | 5 +- 6 files changed, 160 insertions(+), 19 deletions(-) diff --git a/docs/static/openapi/cortex.json b/docs/static/openapi/cortex.json index 0f715456d..8b3acb0e2 100644 --- a/docs/static/openapi/cortex.json +++ b/docs/static/openapi/cortex.json @@ -554,6 +554,46 @@ "tags": ["Models"] } }, + "/v1/models/import": { + "post": { + "operationId": "ModelsController_importModel", + "summary": "Import model", + "description": "Imports a model from a specified path.", + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ImportModelRequest" + }, + "example": { + "model": "model-id", + "modelPath": "/path/to/gguf", + "name": "model display name" + } + } + } + }, + "responses": { + "200": { + "description": "Model is imported successfully!", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ImportModelResponse" + }, + "example": { + "message": "Model is imported successfully!", + "modelHandle": "model-id", + "result": "OK" + } + } + } + } + }, + "tags": ["Models"] + } + }, "/v1/threads": { "post": { "operationId": "ThreadsController_create", @@ -1660,6 +1700,15 @@ "value": "my-custom-model-id" } ] + }, + "name": { + "type": "string", + "description": "The name which will be used to overwrite the model name.", + "examples": [ + { + "value": "my-custom-model-name" + } + ] } } }, @@ -1803,6 +1852,43 @@ } } }, + "ImportModelRequest": { + "type": "object", + "properties": { + "model": { + "type": "string", + "description": "The unique identifier of the model." + }, + "modelPath": { + "type": "string", + "description": "The file path to the model." + }, + "name": { + "type": "string", + "description": "The display name of the model." + } + }, + "required": ["model", "modelPath"] + }, + "ImportModelResponse": { + "type": "object", + "properties": { + "message": { + "type": "string", + "description": "Success message." + }, + "modelHandle": { + "type": "string", + "description": "The unique identifier of the imported model." + }, + "result": { + "type": "string", + "description": "Result status.", + "example": "OK" + } + }, + "required": ["message", "modelHandle", "result"] + }, "CommonResponseDto": { "type": "object", "properties": { diff --git a/engine/controllers/models.cc b/engine/controllers/models.cc index 602c81ab6..939f63f31 100644 --- a/engine/controllers/models.cc +++ b/engine/controllers/models.cc @@ -33,12 +33,19 @@ void Models::PullModel(const HttpRequestPtr& req, desired_model_id = id; } + std::optional desired_model_name = std::nullopt; + auto name_value = (*(req->getJsonObject())).get("name", "").asString(); + + if (!name_value.empty()) { + desired_model_name = name_value; + } + auto handle_model_input = [&, model_handle]() -> cpp::result { CTL_INF("Handle model input, model handle: " + model_handle); if (string_utils::StartsWith(model_handle, "https")) { - return model_service_->HandleDownloadUrlAsync(model_handle, - desired_model_id); + return model_service_->HandleDownloadUrlAsync( + model_handle, desired_model_id, desired_model_name); } else if (model_handle.find(":") != std::string::npos) { auto model_and_branch = string_utils::SplitBy(model_handle, ":"); return model_service_->DownloadModelFromCortexsoAsync( @@ -312,6 +319,7 @@ void Models::ImportModel( } auto modelHandle = (*(req->getJsonObject())).get("model", "").asString(); auto modelPath = (*(req->getJsonObject())).get("modelPath", "").asString(); + auto modelName = (*(req->getJsonObject())).get("name", "").asString(); config::GGUFHandler gguf_handler; config::YamlHandler yaml_handler; cortex::db::Models modellist_utils_obj; @@ -333,6 +341,7 @@ void Models::ImportModel( config::ModelConfig model_config = gguf_handler.GetModelConfig(); model_config.files.push_back(modelPath); model_config.model = modelHandle; + model_config.name = modelName.empty() ? model_config.name : modelName; yaml_handler.UpdateModelConfig(model_config); if (modellist_utils_obj.AddModelEntry(model_entry).value()) { diff --git a/engine/e2e-test/test_api_model_import.py b/engine/e2e-test/test_api_model_import.py index 8dd34ea7a..3f8a82a0d 100644 --- a/engine/e2e-test/test_api_model_import.py +++ b/engine/e2e-test/test_api_model_import.py @@ -18,5 +18,25 @@ def setup_and_teardown(self): def test_model_import_should_be_success(self): body_json = {'model': 'tinyllama:gguf', 'modelPath': '/path/to/local/gguf'} - response = requests.post("http://localhost:3928/models/import", json = body_json) - assert response.status_code == 200 \ No newline at end of file + response = requests.post("http://localhost:3928/models/import", json=body_json) + assert response.status_code == 200 + + @pytest.mark.skipif(True, reason="Expensive test. Only test when you have local gguf file.") + def test_model_import_with_name_should_be_success(self): + body_json = {'model': 'tinyllama:gguf', + 'modelPath': '/path/to/local/gguf', + 'name': 'test_model'} + response = requests.post("http://localhost:3928/models/import", json=body_json) + assert response.status_code == 200 + + def test_model_import_with_invalid_path_should_fail(self): + body_json = {'model': 'tinyllama:gguf', + 'modelPath': '/invalid/path/to/gguf'} + response = requests.post("http://localhost:3928/models/import", json=body_json) + assert response.status_code == 400 + + def test_model_import_with_missing_model_should_fail(self): + body_json = {'modelPath': '/path/to/local/gguf'} + response = requests.post("http://localhost:3928/models/import", json=body_json) + print(response) + assert response.status_code == 409 \ No newline at end of file diff --git a/engine/e2e-test/test_api_model_pull_direct_url.py b/engine/e2e-test/test_api_model_pull_direct_url.py index e93ca2ddd..aa15fbfba 100644 --- a/engine/e2e-test/test_api_model_pull_direct_url.py +++ b/engine/e2e-test/test_api_model_pull_direct_url.py @@ -21,7 +21,7 @@ def setup_and_teardown(self): [ "models", "delete", - "TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf", + "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf", ], ) yield @@ -32,7 +32,7 @@ def setup_and_teardown(self): [ "models", "delete", - "TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf", + "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf", ], ) stop_server() @@ -40,16 +40,35 @@ def setup_and_teardown(self): @pytest.mark.asyncio async def test_model_pull_with_direct_url_should_be_success(self): myobj = { - "model": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/blob/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf" + "model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf" } response = requests.post("http://localhost:3928/models/pull", json=myobj) assert response.status_code == 200 await wait_for_websocket_download_success_event(timeout=None) get_model_response = requests.get( - "http://127.0.0.1:3928/models/TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf" + "http://127.0.0.1:3928/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf" ) assert get_model_response.status_code == 200 assert ( get_model_response.json()["model"] - == "TheBloke:TinyLlama-1.1B-Chat-v0.3-GGUF:tinyllama-1.1b-chat-v0.3.Q2_K.gguf" + == "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf" + ) + + @pytest.mark.asyncio + async def test_model_pull_with_direct_url_should_have_desired_name(self): + myobj = { + "model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf", + "name": "smol_llama_100m" + } + response = requests.post("http://localhost:3928/models/pull", json=myobj) + assert response.status_code == 200 + await wait_for_websocket_download_success_event(timeout=None) + get_model_response = requests.get( + "http://127.0.0.1:3928/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf" + ) + assert get_model_response.status_code == 200 + print(get_model_response.json()["name"]) + assert ( + get_model_response.json()["name"] + == "smol_llama_100m" ) diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index b49df3420..4967b1dd9 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -17,7 +17,8 @@ namespace { void ParseGguf(const DownloadItem& ggufDownloadItem, - std::optional author) { + std::optional author, + std::optional name) { namespace fs = std::filesystem; namespace fmu = file_manager_utils; config::GGUFHandler gguf_handler; @@ -32,6 +33,8 @@ void ParseGguf(const DownloadItem& ggufDownloadItem, fmu::ToRelativeCortexDataPath(fs::path(ggufDownloadItem.localPath)); model_config.files = {file_rel_path.string()}; model_config.model = ggufDownloadItem.id; + model_config.name = + name.has_value() ? name.value() : gguf_handler.GetModelConfig().name; yaml_handler.UpdateModelConfig(model_config); auto yaml_path{ggufDownloadItem.localPath}; @@ -223,7 +226,8 @@ std::optional ModelService::GetDownloadedModel( } cpp::result ModelService::HandleDownloadUrlAsync( - const std::string& url, std::optional temp_model_id) { + const std::string& url, std::optional temp_model_id, + std::optional temp_name) { auto url_obj = url_parser::FromUrlString(url); if (url_obj.host == kHuggingFaceHost) { @@ -279,9 +283,9 @@ cpp::result ModelService::HandleDownloadUrlAsync( .localPath = local_path, }}}}; - auto on_finished = [author](const DownloadTask& finishedTask) { + auto on_finished = [author, temp_name](const DownloadTask& finishedTask) { auto gguf_download_item = finishedTask.items[0]; - ParseGguf(gguf_download_item, author); + ParseGguf(gguf_download_item, author, temp_name); }; downloadTask.id = unique_model_id; @@ -346,7 +350,7 @@ cpp::result ModelService::HandleUrl( auto on_finished = [author](const DownloadTask& finishedTask) { auto gguf_download_item = finishedTask.items[0]; - ParseGguf(gguf_download_item, author); + ParseGguf(gguf_download_item, author, std::nullopt); }; auto result = download_service_->AddDownloadTask(downloadTask, on_finished); @@ -770,7 +774,7 @@ cpp::result ModelService::GetModelPullInfo( auto author{url_obj.pathParams[0]}; auto model_id{url_obj.pathParams[1]}; auto file_name{url_obj.pathParams.back()}; - if (author == "cortexso") { + if (author == "cortexso") { return ModelPullInfo{.id = model_id + ":" + url_obj.pathParams[3], .downloaded_models = {}, .available_models = {}, @@ -787,8 +791,10 @@ cpp::result ModelService::GetModelPullInfo( if (parsed.size() != 2) { return cpp::fail("Invalid model handle: " + input); } - return ModelPullInfo{ - .id = input, .downloaded_models = {}, .available_models = {}, .download_url = input}; + return ModelPullInfo{.id = input, + .downloaded_models = {}, + .available_models = {}, + .download_url = input}; } if (input.find("/") != std::string::npos) { diff --git a/engine/services/model_service.h b/engine/services/model_service.h index 495685982..c1600e2a6 100644 --- a/engine/services/model_service.h +++ b/engine/services/model_service.h @@ -39,7 +39,7 @@ class ModelService { std::shared_ptr download_service, std::shared_ptr inference_service) : download_service_{download_service}, - inference_svc_(inference_service) {}; + inference_svc_(inference_service){}; /** * Return model id if download successfully @@ -81,7 +81,8 @@ class ModelService { cpp::result HandleUrl(const std::string& url); cpp::result HandleDownloadUrlAsync( - const std::string& url, std::optional temp_model_id); + const std::string& url, std::optional temp_model_id, + std::optional temp_name); private: /** From f37ad6b437c879f7d0bf02d01b2c03b632536ea8 Mon Sep 17 00:00:00 2001 From: hiento09 <136591877+hiento09@users.noreply.github.com> Date: Fri, 1 Nov 2024 12:39:58 +0700 Subject: [PATCH 3/7] chore: beta version add discord notification (#1605) Co-authored-by: Hien To --- .github/workflows/beta-build.yml | 60 ++++++++++++++++++++---------- .github/workflows/stable-build.yml | 25 +------------ 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml index 4dd0d8716..1c9e49790 100644 --- a/.github/workflows/beta-build.yml +++ b/.github/workflows/beta-build.yml @@ -35,6 +35,7 @@ jobs: name: "${{ env.VERSION }}" draft: true prerelease: false + generate_release_notes: true build-macos: uses: ./.github/workflows/template-build-macos.yml @@ -79,26 +80,47 @@ jobs: upload_url: ${{ needs.create-draft-release.outputs.upload_url }} cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }} - update_release_draft: - needs: [build-macos, build-windows-x64, build-linux-x64] + update_release: + needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64] + permissions: + # write permission is required to create a github release + contents: write + # write permission is required for autolabeler + # otherwise, read permission is required at least + pull-requests: write + runs-on: ubuntu-latest + steps: + - name: set release to prerelease + run: | + gh release edit v${{ needs.get-update-version.outputs.new_version }} --draft=false --prerelease + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + noti-discord: + needs: [get-update-version, create-draft-release, build-macos, build-windows-x64, build-linux-x64, update_release] + runs-on: ubuntu-latest permissions: - # write permission is required to create a github release contents: write - # write permission is required for autolabeler - # otherwise, read permission is required at least - pull-requests: write - runs-on: ubuntu-latest steps: - # (Optional) GitHub Enterprise requires GHE_HOST variable set - #- name: Set GHE_HOST - # run: | - # echo "GHE_HOST=${GITHUB_SERVER_URL##https:\/\/}" >> $GITHUB_ENV + - name: Set version to environment variable + run: | + echo "VERSION=${{ needs.get-update-version.outputs.new_version }}" >> $GITHUB_ENV + echo "RUNNER_ID=$GITHUB_RUN_ID" >> $GITHUB_ENV - # Drafts your next Release notes as Pull Requests are merged into "master" - - uses: release-drafter/release-drafter@v5 - # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml - # with: - # config-name: my-config.yml - # disable-autolabeler: true - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file + - name: Notify Discord + uses: appleboy/discord-action@v1.0.0 + with: + webhook_id: ${{ secrets.WEBHOOK_ID_BETA }} + webhook_token: ${{ secrets.WEBHOOK_TOKEN_BETA }} + message: | + Cortex.cpp beta build artifact version ${{ env.VERSION }}: + - Windows: + - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-network-installer.exe + - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-local-installer.exe + - macOS Universal: + - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-network-installer.pkg + - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-local-installer.pkg + - Linux Deb: + - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-network-installer.deb + - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb + - Github Release: https://github.com/janhq/cortex.cpp/releases/tag/v${{ env.VERSION }} \ No newline at end of file diff --git a/.github/workflows/stable-build.yml b/.github/workflows/stable-build.yml index 8caea8a3e..2ffece895 100644 --- a/.github/workflows/stable-build.yml +++ b/.github/workflows/stable-build.yml @@ -35,6 +35,7 @@ jobs: name: "${{ env.VERSION }}" draft: true prerelease: false + generate_release_notes: true build-macos: uses: ./.github/workflows/template-build-macos.yml @@ -78,27 +79,3 @@ jobs: channel: stable upload_url: ${{ needs.create-draft-release.outputs.upload_url }} cortex-llamacpp-version: ${{ needs.get-cortex-llamacpp-latest-version.outputs.cortex_llamacpp_latest_version }} - - update_release_draft: - needs: [build-macos, build-windows-x64, build-linux-x64] - permissions: - # write permission is required to create a github release - contents: write - # write permission is required for autolabeler - # otherwise, read permission is required at least - pull-requests: write - runs-on: ubuntu-latest - steps: - # (Optional) GitHub Enterprise requires GHE_HOST variable set - #- name: Set GHE_HOST - # run: | - # echo "GHE_HOST=${GITHUB_SERVER_URL##https:\/\/}" >> $GITHUB_ENV - - # Drafts your next Release notes as Pull Requests are merged into "master" - - uses: release-drafter/release-drafter@v5 - # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml - # with: - # config-name: my-config.yml - # disable-autolabeler: true - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file From 11cd87be262e7591d24c302fa13d4972ae63b719 Mon Sep 17 00:00:00 2001 From: hiento09 <136591877+hiento09@users.noreply.github.com> Date: Fri, 1 Nov 2024 13:33:24 +0700 Subject: [PATCH 4/7] Feat e2e test cortexso hub (#1590) * feat: e2e testing cortexso model hub * chore: schedule to run models test weekly * chore: resolve warning pytest * chore: use default branch cortexso hub --------- Co-authored-by: Hien To --- .github/workflows/test-cortexso-model-hub.yml | 109 ++++++++++++ engine/e2e-test/pytest.ini | 2 + engine/e2e-test/requirements.txt | 1 + .../test_api_cortexso_hub_llamacpp_engine.py | 166 ++++++++++++++++++ 4 files changed, 278 insertions(+) create mode 100644 .github/workflows/test-cortexso-model-hub.yml create mode 100644 engine/e2e-test/pytest.ini create mode 100644 engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py diff --git a/.github/workflows/test-cortexso-model-hub.yml b/.github/workflows/test-cortexso-model-hub.yml new file mode 100644 index 000000000..320369235 --- /dev/null +++ b/.github/workflows/test-cortexso-model-hub.yml @@ -0,0 +1,109 @@ +name: Test cortexso Model Hub + +on: + schedule: + - cron: "0 16 * * 5" # every Friday at 23:00 UTC+7 + workflow_dispatch: + +jobs: + build-and-test: + runs-on: ${{ matrix.runs-on }} + timeout-minutes: 1440 + strategy: + fail-fast: false + matrix: + include: + - os: "linux" + name: "amd64" + runs-on: "ubuntu-20-04-e2e-cortexcpp-model-hub" + cmake-flags: "-DCORTEX_CPP_VERSION=${{github.head_ref}} -DCMAKE_BUILD_TEST=ON -DCMAKE_TOOLCHAIN_FILE=vcpkg/scripts/buildsystems/vcpkg.cmake" + build-deps-cmake-flags: "" + ccache-dir: "" + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: use python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install tools on Linux + run: | + sudo chown -R runner:runner /home/runner/cortexcpp + python3 -m pip install awscli + + - name: Download vcpkg cache from s3 + continue-on-error: true + run: | + aws s3 sync s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux /home/runner/.cache/vcpkg --endpoint ${{ secrets.MINIO_ENDPOINT }} --cli-read-timeout 0 + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" + + - name: Configure vcpkg + run: | + cd engine + make configure-vcpkg + + - name: Build + run: | + cd engine + make build CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" BUILD_DEPS_CMAKE_EXTRA_FLAGS="${{ matrix.build-deps-cmake-flags }}" + + - name: Run unit tests + run: | + cd engine + make run-unit-tests + + - name: Run setup config for linux + shell: bash + run: | + cd engine + ./build/cortex --version + sed -i 's/huggingFaceToken: ""/huggingFaceToken: "${{ secrets.HUGGINGFACE_TOKEN_READ }}"/' ~/.cortexrc + + - name: Run e2e tests + run: | + cd engine + cp build/cortex build/cortex-nightly + cp build/cortex build/cortex-beta + python -m pip install --upgrade pip + python -m pip install -r e2e-test/requirements.txt + pytest e2e-test/test_api_cortexso_hub_llamacpp_engine.py + rm build/cortex-nightly + rm build/cortex-beta + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN_E2E }} + + - name: Pre-package + run: | + cd engine + make pre-package DESTINATION_BINARY_NAME="cortex" + + - name: Package + run: | + cd engine + make package + + - name: Upload Artifact + uses: actions/upload-artifact@v4 + with: + name: cortex-${{ matrix.os }}-${{ matrix.name }} + path: ./engine/cortex + + + - name: Upload linux vcpkg cache to s3 + continue-on-error: true + if: always() + run: | + aws s3 sync /home/runner/.cache/vcpkg s3://${{ secrets.MINIO_BUCKET_NAME }}/cortex-cpp-vcpkg-linux --endpoint ${{ secrets.MINIO_ENDPOINT }} + env: + AWS_ACCESS_KEY_ID: "${{ secrets.MINIO_ACCESS_KEY_ID }}" + AWS_SECRET_ACCESS_KEY: "${{ secrets.MINIO_SECRET_ACCESS_KEY }}" + AWS_DEFAULT_REGION: "${{ secrets.MINIO_REGION }}" diff --git a/engine/e2e-test/pytest.ini b/engine/e2e-test/pytest.ini new file mode 100644 index 000000000..0102b0a97 --- /dev/null +++ b/engine/e2e-test/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +asyncio_default_fixture_loop_scope = function diff --git a/engine/e2e-test/requirements.txt b/engine/e2e-test/requirements.txt index f0eabb974..05b47e0b0 100644 --- a/engine/e2e-test/requirements.txt +++ b/engine/e2e-test/requirements.txt @@ -2,3 +2,4 @@ websockets pytest pytest-asyncio requests +pyyaml \ No newline at end of file diff --git a/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py new file mode 100644 index 000000000..e13c4827a --- /dev/null +++ b/engine/e2e-test/test_api_cortexso_hub_llamacpp_engine.py @@ -0,0 +1,166 @@ +import pytest +import requests +import os +import yaml + +from pathlib import Path +from test_runner import ( + run, + start_server, + stop_server, + wait_for_websocket_download_success_event, +) + +collection_id = "cortexso/local-models-6683a6e29e8f3018845b16db" +token = os.getenv("HF_TOKEN") +if not token: + raise ValueError("HF_TOKEN environment variable not set") + +def get_repos_in_collection(collection_id, token): + # API endpoint to get list of repos in the collection + url = f"https://huggingface.co/api/collections/{collection_id}" + headers = {"Authorization": f"Bearer {token}"} + response = requests.get(url, headers=headers) + + # Check response and retrieve repo IDs if successful + if response.status_code == 200: + return [repo['id'] for repo in response.json()["items"]] + else: + print("Error fetching repos:", response.status_code, response.json()) + return [] + +def get_repo_default_branch(repo_id, token): + # Direct link to metadata.yaml on the main branch + url = f"https://huggingface.co/{repo_id}/resolve/main/metadata.yml" + headers = {"Authorization": f"Bearer {token}"} + response = requests.get(url, headers=headers) + + # Check response and retrieve the 'default' field value + if response.status_code == 200: + # Read YAML content from response text + metadata = yaml.safe_load(response.text) + return metadata.get("default") + else: + print(f"Error fetching metadata for {repo_id}:", response.status_code, response.json()) + return None + +def get_all_repos_and_default_branches_from_metadata(collection_id, token): + # Get list of repos from the collection + repos = get_repos_in_collection(collection_id, token) + combined_list = [] + + # Iterate over each repo and fetch the default branch from metadata + for repo_id in repos: + default_branch = get_repo_default_branch(repo_id, token) + if default_branch and "gguf" in default_branch: + combined_list.append(f"{repo_id.split('/')[1]}:{default_branch}") + + return combined_list + +#Call the function and print the results +repo_branches = get_all_repos_and_default_branches_from_metadata(collection_id, token) + +class TestCortexsoModels: + + @pytest.fixture(autouse=True) + def setup_and_teardown(self, request): + # Setup + success = start_server() + if not success: + raise Exception("Failed to start server") + # Delete model if exists + for model_url in repo_branches: + run( + "Delete model", + [ + "models", + "delete", + model_url, + ], + ) + yield + + # Teardown + for model_url in repo_branches: + run( + "Delete model", + [ + "models", + "delete", + model_url, + ], + ) + stop_server() + + @pytest.mark.parametrize("model_url", repo_branches) + @pytest.mark.asyncio + async def test_models_on_cortexso_hub(self, model_url): + + # Pull model from cortexso hub + json_body = { + "model": model_url + } + response = requests.post("http://localhost:3928/models/pull", json=json_body) + assert response.status_code == 200, f"Failed to pull model: {model_url}" + + await wait_for_websocket_download_success_event(timeout=None) + + # Check if the model was pulled successfully + get_model_response = requests.get( + f"http://127.0.0.1:3928/models/{model_url}" + ) + assert get_model_response.status_code == 200, f"Failed to fetch model: {model_url}" + assert ( + get_model_response.json()["model"] == model_url + ), f"Unexpected model name for: {model_url}" + + # Check if the model is available in the list of models + response = requests.get("http://localhost:3928/models") + assert response.status_code == 200 + models = [i["id"] for i in response.json()["data"]] + assert model_url in models, f"Model not found in list: {model_url}" + + # Install Engine + exit_code, output, error = run( + "Install Engine", ["engines", "install", "llama-cpp"], timeout=None, capture = False + ) + root = Path.home() + assert os.path.exists(root / "cortexcpp" / "engines" / "cortex.llamacpp" / "version.txt") + assert exit_code == 0, f"Install engine failed with error: {error}" + + # Start the model + response = requests.post("http://localhost:3928/models/start", json=json_body) + assert response.status_code == 200, f"status_code: {response.status_code}" + + # Send an inference request + inference_json_body = { + "frequency_penalty": 0.2, + "max_tokens": 4096, + "messages": [ + { + "content": "", + "role": "user" + } + ], + "model": model_url, + "presence_penalty": 0.6, + "stop": [ + "End" + ], + "stream": False, + "temperature": 0.8, + "top_p": 0.95 + } + response = requests.post("http://localhost:3928/v1/chat/completions", json=inference_json_body, headers={"Content-Type": "application/json"}) + assert response.status_code == 200, f"status_code: {response.status_code} response: {response.json()}" + + # Stop the model + response = requests.post("http://localhost:3928/models/stop", json=json_body) + assert response.status_code == 200, f"status_code: {response.status_code}" + + # Uninstall Engine + exit_code, output, error = run( + "Uninstall engine", ["engines", "uninstall", "llama-cpp"] + ) + assert "Engine llama-cpp uninstalled successfully!" in output + assert exit_code == 0, f"Install engine failed with error: {error}" From 322a0cf788506dbfc989e2e7402d1a0d9368822c Mon Sep 17 00:00:00 2001 From: hiento09 <136591877+hiento09@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:55:37 +0700 Subject: [PATCH 5/7] chore: notification discord add binary download url (#1607) Co-authored-by: Hien To --- .github/workflows/beta-build.yml | 3 +++ .github/workflows/template-noti-discord.yaml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml index 1c9e49790..b5bee44b6 100644 --- a/.github/workflows/beta-build.yml +++ b/.github/workflows/beta-build.yml @@ -117,10 +117,13 @@ jobs: - Windows: - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-network-installer.exe - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64-local-installer.exe + - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-windows-amd64.tar.gz - macOS Universal: - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-network-installer.pkg - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal-local-installer.pkg + - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-mac-universal.tar.gz - Linux Deb: - Network Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-network-installer.deb - Local Installer: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb + - Binary: https://github.com/janhq/cortex.cpp/releases/download/v${{ env.VERSION }}/cortex-${{ env.VERSION }}-linux-amd64.tar.gz - Github Release: https://github.com/janhq/cortex.cpp/releases/tag/v${{ env.VERSION }} \ No newline at end of file diff --git a/.github/workflows/template-noti-discord.yaml b/.github/workflows/template-noti-discord.yaml index 673bd3da7..3752a2201 100644 --- a/.github/workflows/template-noti-discord.yaml +++ b/.github/workflows/template-noti-discord.yaml @@ -33,10 +33,13 @@ jobs: - Windows: - Network Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/windows-amd64/cortex-${{ env.VERSION }}-windows-amd64-network-installer.exe - Local Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/windows-amd64/cortex-${{ env.VERSION }}-windows-amd64-local-installer.exe + - Binary: https://delta.jan.ai/cortex/v${{ env.VERSION }}/windows-amd64/cortex-nightly.tar.gz - macOS Universal: - Network Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/mac-universal/cortex-${{ env.VERSION }}-mac-universal-network-installer.pkg - Local Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/mac-universal/cortex-${{ env.VERSION }}-mac-universal-local-installer.pkg + - Binary: https://delta.jan.ai/cortex/v${{ env.VERSION }}/mac-universal/cortex-nightly.tar.gz - Linux Deb: - Network Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/linux-amd64/cortex-${{ env.VERSION }}-linux-amd64-network-installer.deb - Local Installer: https://delta.jan.ai/cortex/v${{ env.VERSION }}/linux-amd64/cortex-${{ env.VERSION }}-linux-amd64-local-installer.deb + - Binary: https://delta.jan.ai/cortex/v${{ env.VERSION }}/linux-amd64/cortex-nightly.tar.gz - Github action run: https://github.com/janhq/cortex.cpp/actions/runs/${{ env.RUNNER_ID }} From 43fbc529abc9c3926702fd2766804b0bc7a5c04b Mon Sep 17 00:00:00 2001 From: hiento09 <136591877+hiento09@users.noreply.github.com> Date: Fri, 1 Nov 2024 16:13:06 +0700 Subject: [PATCH 6/7] chore: auto set pre-release for beta version (#1608) Co-authored-by: Hien To --- .github/workflows/beta-build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/beta-build.yml b/.github/workflows/beta-build.yml index b5bee44b6..0a4d3c735 100644 --- a/.github/workflows/beta-build.yml +++ b/.github/workflows/beta-build.yml @@ -90,6 +90,8 @@ jobs: pull-requests: write runs-on: ubuntu-latest steps: + - name: Getting the repo + uses: actions/checkout@v4 - name: set release to prerelease run: | gh release edit v${{ needs.get-update-version.outputs.new_version }} --draft=false --prerelease From 152b76fe8828fc8644119ba15aafa25eac044f0f Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Fri, 1 Nov 2024 17:58:12 +0700 Subject: [PATCH 7/7] fix: progress bar on CMD (#1609) --- engine/cli/utils/download_progress.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/engine/cli/utils/download_progress.cc b/engine/cli/utils/download_progress.cc index 9c38d4bdf..b47b4fc9a 100644 --- a/engine/cli/utils/download_progress.cc +++ b/engine/cli/utils/download_progress.cc @@ -36,6 +36,20 @@ bool DownloadProgress::Connect(const std::string& host, int port) { bool DownloadProgress::Handle(const DownloadType& event_type) { assert(!!ws_); +#if defined(_WIN32) + HANDLE h_out = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD dw_original_out_mode = 0; + if (h_out != INVALID_HANDLE_VALUE) { + GetConsoleMode(h_out, &dw_original_out_mode); + + // Enable ANSI escape code processing + DWORD dw_requested_out_mode = + dw_original_out_mode | ENABLE_VIRTUAL_TERMINAL_PROCESSING; + if (!SetConsoleMode(h_out, dw_requested_out_mode)) { + SetConsoleMode(h_out, dw_original_out_mode); + } + } +#endif std::unordered_map totals; status_ = DownloadStatus::DownloadStarted; std::unique_ptr> bars; @@ -124,6 +138,11 @@ bool DownloadProgress::Handle(const DownloadType& event_type) { ws_->dispatch(handle_message); } indicators::show_console_cursor(true); +#if defined(_WIN32) + if (dw_original_out_mode != 0 && h_out != INVALID_HANDLE_VALUE) { + SetConsoleMode(h_out, dw_original_out_mode); + } +#endif if (status_ == DownloadStatus::DownloadError) return false; return true;