fix: add n_ubatch to params (#30)

Co-authored-by: vansangpfiev <[email protected]>
janhq · May 20, 2024 · bf485f2 · bf485f2
1 parent a7a04cc
commit bf485f2
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/src/llama_engine.cc b/src/llama_engine.cc
@@ -302,6 +302,7 @@ bool LlamaEngine::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
     model_type = jsonBody->get("model_type", "llm").asString();
     // Check if n_parallel exists in jsonBody, if not, set to drogon_thread
     params.n_batch = jsonBody->get("n_batch", 512).asInt();
+    params.n_ubatch = jsonBody->get("n_ubatch", params.n_batch).asInt();
     params.n_parallel = jsonBody->get("n_parallel", 1).asInt();
     params.n_threads =
         jsonBody->get("cpu_threads", std::thread::hardware_concurrency())
@@ -710,8 +711,8 @@ bool LlamaEngine::CheckModelLoaded(
   if (auto si = server_map_.find(model_id);
       si == server_map_.end() || !si->second.ctx.model_loaded_external) {
     LOG_WARN << "Error: model_id: " << model_id
-              << ", existed: " << (si != server_map_.end())
-              << ", loaded: " << false;
+             << ", existed: " << (si != server_map_.end())
+             << ", loaded: " << false;
     Json::Value jsonResp;
     jsonResp["message"] =
         "Model has not been loaded, please load model into nitro";