From c773409c9f37b311808ab19e1df7b8f16402d3f8 Mon Sep 17 00:00:00 2001 From: vansangpfiev Date: Wed, 4 Dec 2024 13:00:31 +0700 Subject: [PATCH] fix: guard max ctx_len --- engine/services/model_service.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index ee034e8ef..3cfff5cb2 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -702,6 +702,8 @@ cpp::result ModelService::StartModel( config::YamlHandler yaml_handler; try { + constexpr const int kDefautlContextLength = 8192; + int max_model_context_length = kDefautlContextLength; Json::Value json_data; // Currently we don't support download vision models, so we need to bypass check if (!params_override.bypass_model_check()) { @@ -732,7 +734,8 @@ cpp::result ModelService::StartModel( json_data["system_prompt"] = mc.system_template; json_data["user_prompt"] = mc.user_template; json_data["ai_prompt"] = mc.ai_template; - json_data["ctx_len"] = std::min(8192, mc.ctx_len); + json_data["ctx_len"] = std::min(kDefautlContextLength, mc.ctx_len); + max_model_context_length = mc.ctx_len; } else { bypass_stop_check_set_.insert(model_handle); } @@ -754,12 +757,14 @@ cpp::result ModelService::StartModel( ASSIGN_IF_PRESENT(json_data, params_override, cache_enabled); ASSIGN_IF_PRESENT(json_data, params_override, ngl); ASSIGN_IF_PRESENT(json_data, params_override, n_parallel); - ASSIGN_IF_PRESENT(json_data, params_override, ctx_len); ASSIGN_IF_PRESENT(json_data, params_override, cache_type); ASSIGN_IF_PRESENT(json_data, params_override, mmproj); ASSIGN_IF_PRESENT(json_data, params_override, model_path); #undef ASSIGN_IF_PRESENT - + if (params_override.ctx_len) { + json_data["ctx_len"] = + std::min(params_override.ctx_len.value(), max_model_context_length); + } CTL_INF(json_data.toStyledString()); auto may_fallback_res = MayFallbackToCpu(json_data["model_path"].asString(), json_data["ngl"].asInt(),