Skip to content

Commit

Permalink
fix: guard max ctx_len
Browse files Browse the repository at this point in the history
  • Loading branch information
vansangpfiev committed Dec 4, 2024
1 parent e290305 commit c773409
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions engine/services/model_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
config::YamlHandler yaml_handler;

try {
constexpr const int kDefautlContextLength = 8192;
int max_model_context_length = kDefautlContextLength;
Json::Value json_data;
// Currently we don't support download vision models, so we need to bypass check
if (!params_override.bypass_model_check()) {
Expand Down Expand Up @@ -732,7 +734,8 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
json_data["system_prompt"] = mc.system_template;
json_data["user_prompt"] = mc.user_template;
json_data["ai_prompt"] = mc.ai_template;
json_data["ctx_len"] = std::min(8192, mc.ctx_len);
json_data["ctx_len"] = std::min(kDefautlContextLength, mc.ctx_len);
max_model_context_length = mc.ctx_len;
} else {
bypass_stop_check_set_.insert(model_handle);
}
Expand All @@ -754,12 +757,14 @@ cpp::result<StartModelResult, std::string> ModelService::StartModel(
ASSIGN_IF_PRESENT(json_data, params_override, cache_enabled);
ASSIGN_IF_PRESENT(json_data, params_override, ngl);
ASSIGN_IF_PRESENT(json_data, params_override, n_parallel);
ASSIGN_IF_PRESENT(json_data, params_override, ctx_len);
ASSIGN_IF_PRESENT(json_data, params_override, cache_type);
ASSIGN_IF_PRESENT(json_data, params_override, mmproj);
ASSIGN_IF_PRESENT(json_data, params_override, model_path);
#undef ASSIGN_IF_PRESENT

if (params_override.ctx_len) {
json_data["ctx_len"] =
std::min(params_override.ctx_len.value(), max_model_context_length);
}
CTL_INF(json_data.toStyledString());
auto may_fallback_res = MayFallbackToCpu(json_data["model_path"].asString(),
json_data["ngl"].asInt(),
Expand Down

0 comments on commit c773409

Please sign in to comment.