From d47322d302879e09568bf745ac1198cddea5cb4d Mon Sep 17 00:00:00 2001 From: tikikun Date: Wed, 22 Nov 2023 06:48:55 +0700 Subject: [PATCH] hotfix: caching --- controllers/llamaCPP.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc index 429bdab44..881b9632c 100644 --- a/controllers/llamaCPP.cc +++ b/controllers/llamaCPP.cc @@ -157,6 +157,10 @@ void llamaCPP::chatCompletion( // To set default value if (jsonBody) { + // Default values to enable auto caching + data["cache_prompt"] = true; + data["n_keep"] = -1; + data["stream"] = (*jsonBody).get("stream", false).asBool(); data["n_predict"] = (*jsonBody).get("max_tokens", 500).asInt(); data["top_p"] = (*jsonBody).get("top_p", 0.95).asFloat(); @@ -164,7 +168,6 @@ void llamaCPP::chatCompletion( data["frequency_penalty"] = (*jsonBody).get("frequency_penalty", 0).asFloat(); data["presence_penalty"] = (*jsonBody).get("presence_penalty", 0).asFloat(); - data["cache_prompt"] = true; const Json::Value &messages = (*jsonBody)["messages"]; for (const auto &message : messages) { std::string input_role = message["role"].asString();