diff --git a/src/llama_engine.cc b/src/llama_engine.cc index c639f439..4045681b 100644 --- a/src/llama_engine.cc +++ b/src/llama_engine.cc @@ -302,6 +302,7 @@ bool LlamaEngine::LoadModelImpl(std::shared_ptr jsonBody) { model_type = jsonBody->get("model_type", "llm").asString(); // Check if n_parallel exists in jsonBody, if not, set to drogon_thread params.n_batch = jsonBody->get("n_batch", 512).asInt(); + params.n_ubatch = jsonBody->get("n_ubatch", params.n_batch).asInt(); params.n_parallel = jsonBody->get("n_parallel", 1).asInt(); params.n_threads = jsonBody->get("cpu_threads", std::thread::hardware_concurrency()) @@ -710,8 +711,8 @@ bool LlamaEngine::CheckModelLoaded( if (auto si = server_map_.find(model_id); si == server_map_.end() || !si->second.ctx.model_loaded_external) { LOG_WARN << "Error: model_id: " << model_id - << ", existed: " << (si != server_map_.end()) - << ", loaded: " << false; + << ", existed: " << (si != server_map_.end()) + << ", loaded: " << false; Json::Value jsonResp; jsonResp["message"] = "Model has not been loaded, please load model into nitro";