From fc49eab13ee4aa288abb5a99e483a8c90fe38a83 Mon Sep 17 00:00:00 2001 From: tikikun Date: Thu, 16 Nov 2023 17:19:15 +0700 Subject: [PATCH] feat: add warmup model back --- controllers/llamaCPP.cc | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 73e55bd01..415985a9f 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -96,18 +96,19 @@ std::string create_return_json(const std::string &id, const std::string &model, } void llamaCPP::warmupModel() { - // json pseudo; - // - // pseudo["prompt"] = "Hello"; - // pseudo["n_predict"] = 10; - // const int task_id = llama.request_completion(pseudo, false); - // std::string completion_text; - // task_result result = llama.next_result(task_id); - // if (!result.error && result.stop) { - // LOG_INFO << result.result_json.dump(-1, ' ', false, - // json::error_handler_t::replace); - // } - // return; + json pseudo; + + pseudo["prompt"] = "Hello"; + pseudo["n_predict"] = 10; + pseudo["stream"] = false; + const int task_id = llama.request_completion(pseudo, false, false); + std::string completion_text; + task_result result = llama.next_result(task_id); + if (!result.error && result.stop) { + LOG_INFO << result.result_json.dump(-1, ' ', false, + json::error_handler_t::replace); + } + return; } void llamaCPP::chatCompletion( @@ -365,10 +366,11 @@ void llamaCPP::loadModel( jsonResp["message"] = "Model loaded successfully"; model_loaded = true; auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp); - // warmupModel(); LOG_INFO << "Started background task here!"; backgroundThread = std::thread(&llamaCPP::backgroundTask, this); + warmupModel(); + callback(resp); }