From a4b72267f895725154ddeae277bc64604400a148 Mon Sep 17 00:00:00 2001
From: nhu anh thu <thu@treehouse.finance>
Date: Tue, 31 Oct 2023 16:12:04 +0700
Subject: [PATCH 1/3] Add api to unload model

---
 controllers/llamaCPP.cc | 13 +++++++++++++
 controllers/llamaCPP.h  | 13 +++++++++++++
 2 files changed, 26 insertions(+)
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
index 24e10d9be..cd167c326 100644
--- a/controllers/llamaCPP.cc
+++ b/controllers/llamaCPP.cc
@@ -344,3 +344,16 @@ void llamaCPP::loadModel(
   warmupModel();
   callback(resp);
 }
+
+void inferences::llamaCPP::unloadModel(const HttpRequestPtr &req, std::function<void(const HttpResponsePtr &)> &&callback)
+{
+  Json::Value jsonResp;
+  if (model_loaded) {
+    llama.unloadModel();
+    model_loaded = false;
+    jsonResp["message"] = "Model unloaded successfully";
+  }
+  jsonResp["message"] = "No model loaded";
+  auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
+  callback(resp);
+}
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
index 2b188867a..e9e3a2ff8 100644
--- a/controllers/llamaCPP.h
+++ b/controllers/llamaCPP.h
@@ -260,6 +260,15 @@ struct llama_server_context {
     return true;
   }
 
+  void unloadModel() {
+    if (ctx != nullptr) {
+      llama_free(ctx);
+    }
+    if (model != nullptr) {
+      llama_free_model(model);
+    }
+  }
+
   std::vector<llama_token> tokenize(const json &json_prompt,
                                     bool add_bos) const {
     // If `add_bos` is true, we only add BOS, when json_prompt is a string,
@@ -1272,6 +1281,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
   METHOD_ADD(llamaCPP::chatCompletion, "chat_completion", Post);
   METHOD_ADD(llamaCPP::embedding, "embedding", Post);
   METHOD_ADD(llamaCPP::loadModel, "loadmodel", Post);
+  METHOD_ADD(llamaCPP::loadModel, "unloadmodel", Delete);
   // PATH_ADD("/llama/chat_completion", Post);
   METHOD_LIST_END
   void chatCompletion(const HttpRequestPtr &req,
@@ -1282,6 +1292,9 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
                  std::function<void(const HttpResponsePtr &)> &&callback);
   void warmupModel();
 
+  void unloadModel(const HttpRequestPtr &req,
+                 std::function<void(const HttpResponsePtr &)> &&callback);
+
 private:
   llama_server_context llama;
   bool model_loaded = false;

From 3d81378e5e4e7e310bf8c7aaa1d10e9c20560fb3 Mon Sep 17 00:00:00 2001
From: nhu anh thu <thu@treehouse.finance>
Date: Tue, 31 Oct 2023 16:20:57 +0700
Subject: [PATCH 2/3] set context and model back to nullptr after delete

---
 controllers/llamaCPP.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
index e9e3a2ff8..fbbc1842a 100644
--- a/controllers/llamaCPP.h
+++ b/controllers/llamaCPP.h
@@ -261,12 +261,10 @@ struct llama_server_context {
   }
 
   void unloadModel() {
-    if (ctx != nullptr) {
-      llama_free(ctx);
-    }
-    if (model != nullptr) {
-      llama_free_model(model);
-    }
+    llama_free(ctx);
+    llama_free_model(model);
+    ctx = nullptr;
+    model = nullptr;
   }
 
   std::vector<llama_token> tokenize(const json &json_prompt,

From af3bdb99ea090c04279ebc36a102a25dff8691be Mon Sep 17 00:00:00 2001
From: nhu anh thu <thu@treehouse.finance>
Date: Thu, 2 Nov 2023 18:12:43 +0700
Subject: [PATCH 3/3] fix wrong api endpoind - handler mapping

---
 controllers/llamaCPP.cc | 2 +-
 controllers/llamaCPP.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
index cd167c326..49fc290ff 100644
--- a/controllers/llamaCPP.cc
+++ b/controllers/llamaCPP.cc
@@ -348,12 +348,12 @@ void llamaCPP::loadModel(
 void inferences::llamaCPP::unloadModel(const HttpRequestPtr &req, std::function<void(const HttpResponsePtr &)> &&callback)
 {
   Json::Value jsonResp;
+  jsonResp["message"] = "No model loaded";
   if (model_loaded) {
     llama.unloadModel();
     model_loaded = false;
     jsonResp["message"] = "Model unloaded successfully";
   }
-  jsonResp["message"] = "No model loaded";
   auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
   callback(resp);
 }
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
index fbbc1842a..67b5d979c 100644
--- a/controllers/llamaCPP.h
+++ b/controllers/llamaCPP.h
@@ -1279,7 +1279,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
   METHOD_ADD(llamaCPP::chatCompletion, "chat_completion", Post);
   METHOD_ADD(llamaCPP::embedding, "embedding", Post);
   METHOD_ADD(llamaCPP::loadModel, "loadmodel", Post);
-  METHOD_ADD(llamaCPP::loadModel, "unloadmodel", Delete);
+  METHOD_ADD(llamaCPP::unloadModel, "unloadmodel", Delete);
   // PATH_ADD("/llama/chat_completion", Post);
   METHOD_LIST_END
   void chatCompletion(const HttpRequestPtr &req,