From f430ead1c28c198fd445b6d7027a01e8e228a024 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Tue, 22 Oct 2024 11:04:43 +0700 Subject: [PATCH] fix: warm-up (#259) * fix: warm-up * fix: n_predict 10 * Revert "fix: n_predict 10" This reverts commit d08e95289773753c23c25630efa1942754e0c3ac. * revert: CI --------- Co-authored-by: vansangpfiev --- src/llama_engine.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/llama_engine.cc b/src/llama_engine.cc index bfcdb32..06f2874 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -978,6 +978,8 @@ void LlamaEngine::WarmUpModel(const std::string& model_id) { pseudo["prompt"] = "Hello"; pseudo["n_predict"] = 2; pseudo["stream"] = false; + pseudo["cache_prompt"] = server_map_[model_id].caching_enabled; + pseudo["n_keep"] = 0; const int task_id = si->second.ctx.RequestCompletion(pseudo, false, false, -1); TaskResult result = si->second.ctx.NextResult(task_id);