From a4b72267f895725154ddeae277bc64604400a148 Mon Sep 17 00:00:00 2001 From: nhu anh thu Date: Tue, 31 Oct 2023 16:12:04 +0700 Subject: [PATCH 1/3] Add api to unload model --- controllers/llamaCPP.cc | 13 +++++++++++++ controllers/llamaCPP.h | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 24e10d9be..cd167c326 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -344,3 +344,16 @@ void llamaCPP::loadModel( warmupModel(); callback(resp); } + +void inferences::llamaCPP::unloadModel(const HttpRequestPtr &req, std::function &&callback) +{ + Json::Value jsonResp; + if (model_loaded) { + llama.unloadModel(); + model_loaded = false; + jsonResp["message"] = "Model unloaded successfully"; + } + jsonResp["message"] = "No model loaded"; + auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); + callback(resp); +} diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index 2b188867a..e9e3a2ff8 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -260,6 +260,15 @@ struct llama_server_context { return true; } + void unloadModel() { + if (ctx != nullptr) { + llama_free(ctx); + } + if (model != nullptr) { + llama_free_model(model); + } + } + std::vector tokenize(const json &json_prompt, bool add_bos) const { // If `add_bos` is true, we only add BOS, when json_prompt is a string, @@ -1272,6 +1281,7 @@ class llamaCPP : public drogon::HttpController { METHOD_ADD(llamaCPP::chatCompletion, "chat_completion", Post); METHOD_ADD(llamaCPP::embedding, "embedding", Post); METHOD_ADD(llamaCPP::loadModel, "loadmodel", Post); + METHOD_ADD(llamaCPP::loadModel, "unloadmodel", Delete); // PATH_ADD("/llama/chat_completion", Post); METHOD_LIST_END void chatCompletion(const HttpRequestPtr &req, @@ -1282,6 +1292,9 @@ class llamaCPP : public drogon::HttpController { std::function &&callback); void warmupModel(); + void unloadModel(const HttpRequestPtr &req, + std::function &&callback); + private: llama_server_context llama; bool model_loaded = false; From 3d81378e5e4e7e310bf8c7aaa1d10e9c20560fb3 Mon Sep 17 00:00:00 2001 From: nhu anh thu Date: Tue, 31 Oct 2023 16:20:57 +0700 Subject: [PATCH 2/3] set context and model back to nullptr after delete --- controllers/llamaCPP.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index e9e3a2ff8..fbbc1842a 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -261,12 +261,10 @@ struct llama_server_context { } void unloadModel() { - if (ctx != nullptr) { - llama_free(ctx); - } - if (model != nullptr) { - llama_free_model(model); - } + llama_free(ctx); + llama_free_model(model); + ctx = nullptr; + model = nullptr; } std::vector tokenize(const json &json_prompt, From af3bdb99ea090c04279ebc36a102a25dff8691be Mon Sep 17 00:00:00 2001 From: nhu anh thu Date: Thu, 2 Nov 2023 18:12:43 +0700 Subject: [PATCH 3/3] fix wrong api endpoind - handler mapping --- controllers/llamaCPP.cc | 2 +- controllers/llamaCPP.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index cd167c326..49fc290ff 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -348,12 +348,12 @@ void llamaCPP::loadModel( void inferences::llamaCPP::unloadModel(const HttpRequestPtr &req, std::function &&callback) { Json::Value jsonResp; + jsonResp["message"] = "No model loaded"; if (model_loaded) { llama.unloadModel(); model_loaded = false; jsonResp["message"] = "Model unloaded successfully"; } - jsonResp["message"] = "No model loaded"; auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); callback(resp); } diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h index fbbc1842a..67b5d979c 100644 --- a/controllers/llamaCPP.h +++ b/controllers/llamaCPP.h @@ -1279,7 +1279,7 @@ class llamaCPP : public drogon::HttpController { METHOD_ADD(llamaCPP::chatCompletion, "chat_completion", Post); METHOD_ADD(llamaCPP::embedding, "embedding", Post); METHOD_ADD(llamaCPP::loadModel, "loadmodel", Post); - METHOD_ADD(llamaCPP::loadModel, "unloadmodel", Delete); + METHOD_ADD(llamaCPP::unloadModel, "unloadmodel", Delete); // PATH_ADD("/llama/chat_completion", Post); METHOD_LIST_END void chatCompletion(const HttpRequestPtr &req,