From fc49eab13ee4aa288abb5a99e483a8c90fe38a83 Mon Sep 17 00:00:00 2001
From: tikikun <daogiatuank54@gmail.com>
Date: Thu, 16 Nov 2023 17:19:15 +0700
Subject: [PATCH] feat: add warmup model back

---
 controllers/llamaCPP.cc | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
index 73e55bd01..415985a9f 100644
--- a/controllers/llamaCPP.cc
+++ b/controllers/llamaCPP.cc
@@ -96,18 +96,19 @@ std::string create_return_json(const std::string &id, const std::string &model,
 }
 
 void llamaCPP::warmupModel() {
-  //  json pseudo;
-  //
-  //  pseudo["prompt"] = "Hello";
-  //  pseudo["n_predict"] = 10;
-  //  const int task_id = llama.request_completion(pseudo, false);
-  //  std::string completion_text;
-  //  task_result result = llama.next_result(task_id);
-  //  if (!result.error && result.stop) {
-  //    LOG_INFO << result.result_json.dump(-1, ' ', false,
-  //                                        json::error_handler_t::replace);
-  //  }
-  //  return;
+  json pseudo;
+
+  pseudo["prompt"] = "Hello";
+  pseudo["n_predict"] = 10;
+  pseudo["stream"] = false;
+  const int task_id = llama.request_completion(pseudo, false, false);
+  std::string completion_text;
+  task_result result = llama.next_result(task_id);
+  if (!result.error && result.stop) {
+    LOG_INFO << result.result_json.dump(-1, ' ', false,
+                                        json::error_handler_t::replace);
+  }
+  return;
 }
 
 void llamaCPP::chatCompletion(
@@ -365,10 +366,11 @@ void llamaCPP::loadModel(
   jsonResp["message"] = "Model loaded successfully";
   model_loaded = true;
   auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
-  // warmupModel();
 
   LOG_INFO << "Started background task here!";
   backgroundThread = std::thread(&llamaCPP::backgroundTask, this);
+  warmupModel();
+
   callback(resp);
 }