diff --git a/src/llama_server_context.cc b/src/llama_server_context.cc index 6caec8f7..df16551b 100644 --- a/src/llama_server_context.cc +++ b/src/llama_server_context.cc @@ -796,14 +796,26 @@ bool LlamaServerContext::ProcessImages(LlamaClientSlot& slot) const { return slot.images.size() > 0; } void LlamaServerContext::SendError(TaskServer& task, std::string error) { - std::unique_lock lock(mutex_results); + SendError(task.id, task.multitask_id, error); +} + +void LlamaServerContext::SendError(LlamaClientSlot& slot, + const std::string& error) { + SendError(slot.task_id, slot.multitask_id, error); +} + +void LlamaServerContext::SendError(int id_task, int id_multi, + const std::string& error) { TaskResult res; - res.id = task.id; - res.multitask_id = task.multitask_id; + res.id = id_task; + res.multitask_id = id_multi; res.stop = false; res.error = true; res.result_json = {{"content", error}}; - queue_results.push_back(res); + { + std::lock_guard lock(mutex_results); + queue_results.push_back(res); + } condition_results.notify_all(); } @@ -1520,6 +1532,10 @@ bool LlamaServerContext::UpdateSlots() { if (has_images && !IngestImages(slot, n_batch)) { LOG_WARN << "failed processing images"; + slot.state = SlotState::kProcessing; + slot.command = SlotCommand::kNone; + slot.Release(); + SendError(slot, "Failed processing images"); return false; } @@ -1581,9 +1597,9 @@ bool LlamaServerContext::UpdateSlots() { slot.state = SlotState::kProcessing; slot.command = SlotCommand::kNone; slot.Release(); - // SendError(slot, - // "Input prompt is too big compared to KV size. Please " - // "try increasing KV size."); + SendError(slot, + "Input prompt is too big compared to KV size. Please " + "try increasing KV size."); } break; // break loop of n_batch } diff --git a/src/llama_server_context.h b/src/llama_server_context.h index 38b7163f..6fd12095 100644 --- a/src/llama_server_context.h +++ b/src/llama_server_context.h @@ -182,6 +182,8 @@ struct LlamaServerContext { bool ProcessImages(LlamaClientSlot& slot) const; void SendError(TaskServer& task, std::string error); + void SendError(LlamaClientSlot& slot, const std::string& error); + void SendError(int id_task, int id_multi, const std::string& error); void AddMultiTask(int id, std::vector& sub_ids);