From 3f249aa69b65d5ae53c53cff648b41d4bfb0d1bb Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Fri, 29 Nov 2024 14:05:21 +0000 Subject: [PATCH] NPUW Hotfixes: Memory and L0 pipeline (#27826) ### Details: - Keep tensors for decompression cut-off in a host-side closure, not lazy tensor - so they are not uploaded to bank & detached after that - This leads to 2x memory consumption and the subsequent crash - Relaxed requirements to enable the unfolded execution - so it may still happen if there's single-call functions that require DCOFF (previously having those would reject this unfolded path ### Tickets: - C-155523 (most likely, related to) @smirnov-alexey please take care of the release branch cherry-pick --- .../intel_npu/src/plugin/npuw/compiled_model.cpp | 5 +++-- .../src/plugin/npuw/partitioning/patterns/dcoff.cpp | 10 ++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index b9cdad9f4879db..f9573cb78f21ec 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -727,8 +727,9 @@ std::shared_ptr ov::npuw::CompiledModel::create_sync_infe const auto num_submodels = m_compiled_submodels.size(); for (std::size_t idx = 0u; idx < num_submodels; idx++) { const auto& comp_model_desc = m_compiled_submodels[idx]; - if (!comp_model_desc.replaced_by.has_value()) { - // not a funcall, do nothing + if (!comp_model_desc.replaced_by.has_value() || comp_model_desc.forced_to_fcall) { + // not a funcall, do nothing, or a subgraph that was forced to funcall + // (a 1-call function) - skip continue; } const auto real_idx = comp_model_desc.replaced_by.value(); diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp index 31093a34871db9..93a43c9b82570a 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp @@ -97,12 +97,10 @@ ClosureRemap build_remap(const Function& fbody, const DCOFFParams& params_to) { LOG_DEBUG("This is an OK parameter, will be kept"); m.closure_remap.push_back(i - fbody._param_offset); - // Check if unpack is indeed required - const auto& type = param->get_element_type(); - if (type == ov::element::i4 || type == ov::element::u4 || type == ov::element::i8 || - type == ov::element::u8) { - m.weights_to_unpack.insert(i - fbody._param_offset); - } + // FIXME: type should be queried from a lazy tensor + // and compared against param->get_element_type() + // to decide 100% + m.weights_to_unpack.insert(i - fbody._param_offset); } // Process zero points for parameters