From d33ada4bd9e18cd01f56dce372d6d294c166c137 Mon Sep 17 00:00:00 2001
From: Alexey Smirnov <alexey.smirnov@intel.com>
Date: Fri, 29 Nov 2024 12:26:27 +0000
Subject: [PATCH] [NPUW] LazyTensor refactoring (#27798)

Mirror of https://github.com/openvinotoolkit/openvino/pull/27108
---
 .../intel_npu/src/plugin/npuw/lazy_tensor.cpp | 398 ++++++++----------
 .../intel_npu/src/plugin/npuw/lazy_tensor.hpp |  32 +-
 .../plugin/npuw/partitioning/partitioning.cpp |  27 +-
 .../npuw/partitioning/patterns/dcoff.cpp      |   8 +-
 4 files changed, 208 insertions(+), 257 deletions(-)
diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
index 8a0317a9f714e8..81521222ae6fae 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
@@ -4,41 +4,166 @@
 
 #include "lazy_tensor.hpp"
 
-using ov::npuw::weights::ConcatMeta;
-using ov::npuw::weights::ConstPtr;
+#include <tuple>
+#include <type_traits>
+#include <variant>
+
+#include "logging.hpp"
+#include "openvino/runtime/make_tensor.hpp"
+#include "util.hpp"
+
 using ov::npuw::weights::LazyTensor;
-using ov::npuw::weights::OrigData;
-using ov::npuw::weights::Transform;
-using ov::npuw::weights::TransformType;
-using ov::npuw::weights::UnpackMeta;
 
 namespace ov {
 namespace npuw {
 namespace weights {
+namespace op {
+struct Const {
+    std::shared_ptr<ov::op::v0::Constant> node;
+
+    std::size_t hash() const {
+        std::size_t seed = std::hash<const void*>()(node->get_data_ptr()) + 0x9e3779b9;
+        seed ^= node->get_element_type().hash() + 0x9e3779b9;
+        for (const auto& dim : node->get_shape()) {
+            seed ^= std::hash<std::size_t>()(dim) + 0x9e3779b9;
+        }
+        return seed;
+    }
+    bool operator==(const Const& other) const {
+        return (node->get_shape() == other.node->get_shape() &&
+                node->get_element_type() == other.node->get_element_type() &&
+                node->get_data_ptr() == other.node->get_data_ptr());
+    }
+    ov::Tensor eval() const {
+        return ov::npuw::util::tensor_from_const(node);
+    }
+};
+struct Concat {
+    std::vector<LazyTensor> tensors;
+    std::size_t axis;
+
+    std::size_t hash() const {
+        std::size_t seed = std::hash<std::size_t>()(axis) + 0x9e3779b9;
+        for (auto& lt : tensors) {
+            seed ^= lt.get_hash() + 0x9e3779b9;
+        }
+        return seed;
+    }
+    bool operator==(const Concat& other) const {
+        return (axis == other.axis && tensors == other.tensors);
+    }
+    ov::Tensor eval() const {
+        std::vector<ov::Tensor> to_concat;
+        for (const auto& lt : tensors) {
+            to_concat.push_back(lt.eval());
+        }
+        return ov::npuw::util::concat(to_concat, axis);
+    }
+};
+
+struct Unpack {
+    LazyTensor w, z, s;
+    ov::element::Type type;
+    ov::Shape shape;
+
+    std::size_t hash() const {
+        std::size_t seed = w.get_hash() + 0x9e3779b9;
+        seed ^= z.get_hash() + 0x9e3779b9;
+        seed ^= s.get_hash() + 0x9e3779b9;
+        seed ^= type.hash() + 0x9e3779b9;
+        for (const auto& dim : shape) {
+            seed ^= std::hash<std::size_t>()(dim) + 0x9e3779b9;
+        }
+        return seed;
+    }
+    bool operator==(const Unpack& other) const {
+        return (type == other.type && shape == other.shape && w == other.w && z == other.z && s == other.s);
+    }
+    ov::Tensor eval() const {
+        const auto& gti = ov::get_tensor_impl;
+        const auto& tw = w.eval();
+        const auto& tz = z.eval();
+        const auto& ts = s.eval();
+        NPUW_ASSERT(tw);
+        ov::Tensor dst(type, shape);
+        if (tw && tz && ts) {
+            ov::npuw::util::unpack(gti(tw), gti(tz), gti(ts), gti(dst));
+        } else if (tw && ts) {
+            ov::npuw::util::unpack(gti(tw), gti(ts), gti(dst));
+        } else {
+            NPUW_ASSERT(false && "Unsupported combination");
+        }
+        return dst;
+    }
+};
+struct Permute {
+    LazyTensor tensor;
+    std::vector<std::size_t> axes;
+
+    std::size_t hash() const {
+        std::size_t seed = tensor.get_hash() + 0x9e3779b9;
+        for (const auto& axis : axes) {
+            seed ^= std::hash<std::size_t>()(axis) + 0x9e3779b9;
+        }
+        return seed;
+    }
+    bool operator==(const Permute& other) const {
+        return (axes == other.axes && tensor == other.tensor);
+    }
+    ov::Tensor eval() const {
+        return ov::npuw::util::permute(tensor.eval(), axes);
+    }
+};
+struct Convert {
+    LazyTensor tensor;
+    ov::element::Type type;
+
+    std::size_t hash() const {
+        std::size_t seed = type.hash() + 0x9e3779b9;
+        seed ^= tensor.get_hash() + 0x9e3779b9;
+        return seed;
+    }
+    bool operator==(const Convert& other) const {
+        return (type == other.type && tensor == other.tensor);
+    }
+    ov::Tensor eval() const {
+        NPUW_ASSERT(ov::element::f16 == type);
+        return ov::npuw::util::to_f16(tensor.eval());
+    }
+};
+}  // namespace op
+
+using Transform = std::variant<op::Const, op::Concat, op::Unpack, op::Permute, op::Convert>;
 
 struct LazyTensorImpl {
 public:
     LazyTensorImpl() = default;
-    LazyTensorImpl(const TransformType& type, const Transform& transform);
-
-    bool operator==(const LazyTensorImpl& other) const;
+    explicit LazyTensorImpl(Transform&& t);
 
     ov::Tensor eval() const;
 
-    ov::Tensor get_orig_tensor() const;
-
+    bool operator==(const LazyTensorImpl& other) const;
     std::size_t get_hash() const;
 
-    bool has_transformations() const;
-
-    std::shared_ptr<LazyTensorImpl> m_parent = nullptr;
-    std::pair<TransformType, Transform> m_transform;
+    Transform m_transform;
     std::size_t m_hash = 0;
+};
+
+}  // namespace weights
+}  // namespace npuw
+}  // namespace ov
+
+using namespace ov::npuw::weights::op;
+using ov::npuw::weights::LazyTensorImpl;
+using ov::npuw::weights::Transform;
 
-    void* m_orig_data = nullptr;
-    ov::Shape m_orig_shape;
-    ov::element::Type m_orig_type;
+// std::visit helper
+template <class... Ts>
+struct overloaded : Ts... {
+    using Ts::operator()...;
 };
+template <class... Ts>
+overloaded(Ts...) -> overloaded<Ts...>;
 
 std::size_t LazyTensorImpl::get_hash() const {
     // Already calculated
@@ -46,120 +171,23 @@ std::size_t LazyTensorImpl::get_hash() const {
         return m_hash;
     }
 
-    // Get parent's hash
+    // Get hash
     std::size_t seed = 0;
-    if (m_parent) {
-        seed = m_parent->get_hash();
-    } else {
-        seed = std::hash<void*>()(m_orig_data) + 0x9e3779b9;
-        for (const auto& dim : m_orig_shape) {
-            seed ^= std::hash<std::size_t>()(dim) + 0x9e3779b9;
-        }
-        seed ^= m_orig_type.hash() + 0x9e3779b9;
-    }
-
-    // Combine with this hash
-    seed ^= std::hash<int>()(static_cast<int>(m_transform.first)) + 0x9e3779b9;
-    if (m_transform.first == TransformType::PERMUTE) {
-        const auto& axes = std::get<std::vector<std::size_t>>(m_transform.second);
-        for (const auto& axis : axes) {
-            seed ^= std::hash<std::size_t>()(axis) + 0x9e3779b9;
-        }
-    } else if (m_transform.first == TransformType::CONCAT) {
-        const auto& axis = std::get<ConcatMeta>(m_transform.second).second;
-        seed ^= std::hash<std::size_t>()(axis) + 0x9e3779b9;
-        for (auto& lt : std::get<ConcatMeta>(m_transform.second).first) {
-            seed ^= lt.get_hash() + 0x9e3779b9;
-        }
-    } else if (m_transform.first == TransformType::UNPACK) {
-        const auto& unpack_meta = std::get<UnpackMeta>(m_transform.second);
-        seed ^= std::get<0>(unpack_meta).get_hash() + 0x9e3779b9;
-        seed ^= std::get<1>(unpack_meta).get_hash() + 0x9e3779b9;
-        seed ^= std::get<2>(unpack_meta).get_hash() + 0x9e3779b9;
-        for (const auto& dim : std::get<3>(unpack_meta)) {
-            seed ^= std::hash<std::size_t>()(dim) + 0x9e3779b9;
-        }
-        seed ^= std::get<4>(unpack_meta).hash() + 0x9e3779b9;
-    }
+    std::visit(overloaded{[&seed](const auto& op) {
+                   seed ^= op.hash();
+               }},
+               m_transform);
 
     return seed;
 }
-}  // namespace weights
-}  // namespace npuw
-}  // namespace ov
-
-using ov::npuw::weights::LazyTensorImpl;
-
-LazyTensorImpl::LazyTensorImpl(const TransformType& type, const Transform& transform) {
-    if (type == TransformType::THIS && std::holds_alternative<OrigData>(transform)) {
-        m_transform = std::make_pair(type, transform);
-        ov::Tensor tensor;
-        if (std::holds_alternative<ConstPtr>(std::get<OrigData>(transform))) {
-            tensor = ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<OrigData>(transform)));
-        } else {
-            tensor = std::get<ov::Tensor>(std::get<OrigData>(transform));
-            if (!tensor) {
-                // Don't set anything
-                return;
-            }
-        }
-        m_orig_data = tensor.data();
-        m_orig_shape = tensor.get_shape();
-        m_orig_type = tensor.get_element_type();
-    } else if (type == TransformType::CONCAT && std::holds_alternative<ConcatMeta>(transform)) {
-        m_transform = std::make_pair(type, transform);
-    } else if (type == TransformType::UNPACK && std::holds_alternative<UnpackMeta>(transform)) {
-        m_transform = std::make_pair(type, transform);
-    } else {
-        NPUW_ASSERT(false);
-    }
 
+LazyTensorImpl::LazyTensorImpl(Transform&& t) {
+    m_transform = std::move(t);
     m_hash = get_hash();
 }
 
 bool LazyTensorImpl::operator==(const LazyTensorImpl& other) const {
-    if (m_hash != other.m_hash || m_orig_data != other.m_orig_data || m_orig_shape != other.m_orig_shape ||
-        m_orig_type != other.m_orig_type || m_transform.first != other.m_transform.first) {
-        return false;
-    }
-
-    switch (m_transform.first) {
-    case TransformType::THIS:
-        // everything is already compared above - skip
-        break;
-    case TransformType::CONVERT:
-        // everything is already compared above - skip
-        break;
-    case TransformType::PERMUTE:
-        if (std::get<std::vector<std::size_t>>(m_transform.second) !=
-            std::get<std::vector<std::size_t>>(other.m_transform.second)) {
-            return false;
-        }
-        break;
-    case TransformType::CONCAT:
-        if (std::get<ConcatMeta>(m_transform.second) != std::get<ConcatMeta>(other.m_transform.second)) {
-            return false;
-        }
-        break;
-    case TransformType::UNPACK:
-        if (std::get<UnpackMeta>(m_transform.second) != std::get<UnpackMeta>(other.m_transform.second)) {
-            return false;
-        }
-        break;
-    default:
-        NPUW_ASSERT(false);
-        break;
-    }
-
-    if ((m_parent && !other.m_parent) || (!m_parent && other.m_parent)) {
-        return false;
-    }
-
-    if (m_parent && other.m_parent) {
-        return *m_parent.get() == *other.m_parent.get();
-    }
-
-    return true;
+    return m_hash == other.m_hash && m_transform == other.m_transform;
 }
 
 ov::Tensor LazyTensorImpl::eval() const {
@@ -173,82 +201,37 @@ ov::Tensor LazyTensorImpl::eval() const {
     Perhaps it should be done after model compilation and not handled here.
     */
 
-    // Process the initial tensor - either from Const or from Concat
-    if (!m_parent) {
-        if (m_transform.first == TransformType::THIS) {
-            return get_orig_tensor();
-        } else if (m_transform.first == TransformType::CONCAT) {
-            std::vector<ov::Tensor> to_concat;
-            for (const auto& lt : std::get<ConcatMeta>(m_transform.second).first) {
-                // Sanity check
-                NPUW_ASSERT(!lt.has_transformations());
-                to_concat.push_back(lt.get_orig_tensor());
-            }
-            return ov::npuw::util::concat(to_concat, std::get<ConcatMeta>(m_transform.second).second);
-        } else if (m_transform.first == TransformType::UNPACK) {
-            const auto& unpack_meta = std::get<UnpackMeta>(m_transform.second);
-            const auto& cw = std::get<0>(unpack_meta);
-            const auto& cz = std::get<1>(unpack_meta);
-            const auto& cs = std::get<2>(unpack_meta);
-            const auto& shape = std::get<3>(unpack_meta);
-            const auto& type = std::get<4>(unpack_meta);
-
-            // Note: unpacking done in-place since the original tensor is empty at this point
-            NPUW_ASSERT(!cw.has_transformations());
-            NPUW_ASSERT(!cs.has_transformations());
-            // FIXME: Ugly check concat case as well since cz might be not set
-            if (cz.has_transformations()) {
-                NPUW_ASSERT(false);
-            }
-
-            const auto& gti = ov::get_tensor_impl;
-            const auto& tw = cw.get_orig_tensor();
-            const auto& tz = cz.get_orig_tensor();
-            const auto& ts = cs.get_orig_tensor();
-            ov::Tensor dst(type, shape);
-            if (tw && tz && ts) {
-                ov::npuw::util::unpack(gti(tw), gti(tz), gti(ts), gti(dst));
-            } else if (tw && ts) {
-                ov::npuw::util::unpack(gti(tw), gti(ts), gti(dst));
-            } else {
-                NPUW_ASSERT(false && "Unsupported combination");
-            }
-            return dst;
-        } else {
-            NPUW_ASSERT(false);
-        }
-    }
-
-    // Process transformation
-    switch (m_transform.first) {
-    case TransformType::PERMUTE:
-        return ov::npuw::util::permute(m_parent->eval(), std::get<std::vector<std::size_t>>(m_transform.second));
-    case TransformType::CONVERT:
-        return ov::npuw::util::to_f16(m_parent->eval());
-    default:
-        NPUW_ASSERT(false);
-    }
-
-    NPUW_ASSERT(false);
-    return ov::Tensor();
+    ov::Tensor result = std::visit(overloaded{[](const auto& op) {
+                                       return op.eval();
+                                   }},
+                                   m_transform);
+    NPUW_ASSERT(result);
+    return result;
 }
 
-ov::Tensor LazyTensorImpl::get_orig_tensor() const {
-    // Sanity check
-    NPUW_ASSERT(!has_transformations());
-    if (std::holds_alternative<ConstPtr>(std::get<OrigData>(m_transform.second))) {
-        return ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<OrigData>(m_transform.second)));
-    }
-    return std::get<ov::Tensor>(std::get<OrigData>(m_transform.second));
+LazyTensor::LazyTensor(const std::shared_ptr<ov::op::v0::Constant>& const_ptr)
+    : m_impl(std::make_shared<LazyTensorImpl>(op::Const{const_ptr})) {}
+LazyTensor::LazyTensor(const std::vector<LazyTensor>& to_concat, const std::size_t axis)
+    : m_impl(std::make_shared<LazyTensorImpl>(op::Concat{to_concat, axis})) {}
+LazyTensor::LazyTensor(const LazyTensor& cw,
+                       const LazyTensor& cz,
+                       const LazyTensor& cs,
+                       const ov::element::Type& type,
+                       const ov::Shape& shape)
+    : m_impl(std::make_shared<LazyTensorImpl>(op::Unpack{cw, cz, cs, type, shape})) {}
+
+LazyTensor LazyTensor::permute(const std::vector<std::size_t>& axes) {
+    LazyTensor new_lt;
+    new_lt.m_impl = std::make_shared<LazyTensorImpl>(op::Permute{*this, axes});
+    return new_lt;
 }
 
-bool LazyTensorImpl::has_transformations() const {
-    return m_transform.first != TransformType::THIS;
+LazyTensor LazyTensor::convert(const ov::element::Type& type) {
+    LazyTensor new_lt;
+    new_lt.m_impl = std::make_shared<LazyTensorImpl>(op::Convert{*this, type});
+    return new_lt;
 }
 
-LazyTensor::LazyTensor(const TransformType& type, const Transform& transform)
-    : m_impl(std::make_shared<LazyTensorImpl>(type, transform)) {}
-
 bool LazyTensor::operator==(const LazyTensor& other) const {
     return *m_impl.get() == *other.m_impl.get();
 }
@@ -257,37 +240,20 @@ bool LazyTensor::operator!=(const LazyTensor& other) const {
     return !(*m_impl.get() == *other.m_impl.get());
 }
 
-void LazyTensor::update(const TransformType& type, const Transform& transform) {
-    const auto& curr = m_impl;
-    auto new_lt = std::make_shared<LazyTensorImpl>();
-
-    new_lt->m_orig_data = curr->m_orig_data;
-    new_lt->m_orig_shape = curr->m_orig_shape;
-    new_lt->m_orig_type = curr->m_orig_type;
-
-    new_lt->m_transform = std::make_pair(type, transform);
-    new_lt->m_parent = curr;
-    new_lt->m_hash = new_lt->get_hash();
-
-    m_impl = new_lt;
-}
-
 ov::Tensor LazyTensor::eval() const {
+    if (!m_impl) {
+        return ov::Tensor();
+    }
     return m_impl->eval();
 }
 
-ov::Tensor LazyTensor::get_orig_tensor() const {
-    return m_impl->get_orig_tensor();
-}
-
 std::size_t LazyTensor::get_hash() const {
+    if (!m_impl) {
+        return 0;
+    }
     return m_impl->get_hash();
 }
 
 std::size_t LazyTensor::Hash::operator()(const LazyTensor& lt) const {
     return lt.get_hash();
 }
-
-bool LazyTensor::has_transformations() const {
-    return m_impl->has_transformations();
-}
diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
index 5cdeeba058e45f..365d9d636872b8 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
@@ -5,33 +5,17 @@
 #pragma once
 
 #include <memory>
-#include <mutex>
-#include <tuple>
-#include <unordered_map>
-#include <variant>
 
-#include "logging.hpp"
-#include "openvino/runtime/make_tensor.hpp"
+#include "openvino/op/constant.hpp"
 #include "openvino/runtime/tensor.hpp"
-#include "util.hpp"
 
 namespace ov {
 namespace npuw {
 namespace weights {
-
-enum class TransformType : int { THIS, PERMUTE, CONVERT, CONCAT, UNPACK };
-
 // Forward declaration
 class LazyTensor;
 struct LazyTensorImpl;
 
-using ConcatMeta = std::pair<std::vector<LazyTensor>, std::size_t>;
-using UnpackMeta = std::tuple<LazyTensor, LazyTensor, LazyTensor, ov::Shape, ov::element::Type>;
-using ConstPtr = std::shared_ptr<ov::op::v0::Constant>;
-using OrigData = std::variant<ConstPtr, ov::Tensor>;
-
-using Transform = std::variant<OrigData, std::vector<std::size_t>, std::monostate, ConcatMeta, UnpackMeta>;
-
 class LazyTensor {
 public:
     class Hash {
@@ -40,17 +24,23 @@ class LazyTensor {
     };
 
     LazyTensor() = default;
-    LazyTensor(const TransformType& type, const Transform& transform);
+    LazyTensor(const std::shared_ptr<ov::op::v0::Constant>& const_ptr);
+    LazyTensor(const std::vector<LazyTensor>& to_concat, const std::size_t axis);  // construct from concat
+    LazyTensor(const LazyTensor& cw,
+               const LazyTensor& cz,
+               const LazyTensor& cs,
+               const ov::element::Type& type,
+               const ov::Shape& shape);  // construct from unpack
+
+    LazyTensor permute(const std::vector<std::size_t>& axes);
+    LazyTensor convert(const ov::element::Type& type);
 
     bool operator==(const LazyTensor& other) const;
     bool operator!=(const LazyTensor& other) const;
 
-    void update(const TransformType& type, const Transform& transform);
     ov::Tensor eval() const;
 
-    ov::Tensor get_orig_tensor() const;
     std::size_t get_hash() const;
-    bool has_transformations() const;
 
 private:
     std::shared_ptr<LazyTensorImpl> m_impl = nullptr;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
index 99705fef30e8a8..2ff41be4c19f78 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -1525,8 +1525,7 @@ void Partitioner::createFunction(FunctionPipeline& func_ggg) {
 
                 LOG_DEBUG("Register " << prod_output << " in the function closure");
                 funcall._lazy_closure.push_back(
-                    LazyTensor(TransformType::THIS,
-                               std::static_pointer_cast<ov::op::v0::Constant>(input_node)));  // (n)/1/i/c
+                    LazyTensor(std::static_pointer_cast<ov::op::v0::Constant>(input_node)));  // (n)/1/i/c
             } else if (ov::op::util::is_parameter(input_node)) {
                 LOG_DEBUG("Handling a Parameter input " << prod_output);
                 LOG_BLOCK();
@@ -1695,8 +1694,7 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) {
                     LOG_DEBUG("Register " << prod_output << " in the function closure[" << param_idx
                                           << "] (via prototype " << proto_layer_name << ")");
                     funcall._lazy_closure[param_idx - function._param_offset] =
-                        LazyTensor(TransformType::THIS,
-                                   std::static_pointer_cast<ov::op::v0::Constant>(input_node));  // (t)/1/c
+                        LazyTensor(std::static_pointer_cast<ov::op::v0::Constant>(input_node));  // (t)/1/c
                 }
             }  // for (inputs)
         }      // for(nodes)
@@ -1765,7 +1763,7 @@ void Partitioner::optimize(const std::string& func_name) {
             auto closure_idx = param_idx - f._param_offset;
             ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) {
                 auto& funcall = func_group.refs[f_idx].get();
-                funcall._lazy_closure[closure_idx].update(TransformType::PERMUTE, p.second);
+                funcall._lazy_closure[closure_idx] = funcall._lazy_closure[closure_idx].permute(p.second);
             });
         }
     };
@@ -1775,7 +1773,7 @@ void Partitioner::optimize(const std::string& func_name) {
             auto closure_idx = param_idx - f._param_offset;
             ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) {
                 auto& funcall = func_group.refs[f_idx].get();
-                funcall._lazy_closure[closure_idx].update(TransformType::CONVERT, std::monostate{});
+                funcall._lazy_closure[closure_idx] = funcall._lazy_closure[closure_idx].convert(ov::element::f16);
             });
         }
     };
@@ -1830,15 +1828,12 @@ void Partitioner::optimize(const std::string& func_name) {
                 std::vector<LazyTensor> to_concat;
                 // Fill tensor vector
                 for (auto&& cidx : to_concat_idx) {
-                    // FIXME: Assuming here concat goes first and other transformations later.
-                    //        This allows to store ov::Tensor and ignore their potential history of transformations
-                    NPUW_ASSERT(!funcall._lazy_closure[cidx].has_transformations());
                     to_concat.push_back(funcall._lazy_closure[cidx]);
                 }
                 // Note: we can ignore updating funcall._lazy_closure[cidx] here since those LazyTensors will be gone
                 // and the new one added into the vector
                 if (!to_concat.empty()) {
-                    funcall._lazy_closure.push_back(LazyTensor(TransformType::CONCAT, std::make_pair(to_concat, axis)));
+                    funcall._lazy_closure.push_back(LazyTensor(to_concat, axis));
                     // Some of the tensors might be in closure - preserve it's 1:1 idx mapping with _lazy_closure
                     funcall._closure.push_back(ov::Tensor());
                 }
@@ -1865,17 +1860,11 @@ void Partitioner::optimize(const std::string& func_name) {
 
             ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) {
                 auto& funcall = func_group.refs[f_idx].get();
-                // FIXME: assuming no transformations were applied to the tensor - since we are utilizing the original
-                // ov::Tensor below
                 LazyTensor cw = funcall._lazy_closure[w_idx - f._param_offset];
-                LazyTensor cz = z_idx != -1 ? funcall._lazy_closure[z_idx - f._param_offset]
-                                            : LazyTensor(TransformType::THIS, ov::Tensor());
+                LazyTensor cz = z_idx != -1 ? funcall._lazy_closure[z_idx - f._param_offset] : LazyTensor();
                 LazyTensor cs = funcall._lazy_closure[s_idx - f._param_offset];
-
-                // FIXME: currently there is an issue that we don't share such tensor between head and tail
                 funcall._lazy_closure.push_back(
-                    LazyTensor(TransformType::UNPACK,
-                               std::make_tuple(cw, cz, cs, p.first->get_shape(), p.first->get_element_type())));
+                    LazyTensor(cw, cz, cs, p.first->get_element_type(), p.first->get_shape()));
                 // Some of the tensors might be in closure - preserve it's 1:1 idx mapping with _lazy_closure
                 funcall._closure.push_back(ov::Tensor());
             });
@@ -1899,7 +1888,7 @@ void Partitioner::optimize(const std::string& func_name) {
                 // Based on our logic (when tensors get transferred from lazy tensors via bank
                 // to the closure), this tensor should be non-empty to avoid this process.
                 funcall.get()._closure.push_back(ov::Tensor(new_elem_type, new_shape));
-                funcall.get()._lazy_closure.push_back(LazyTensor(TransformType::THIS, ov::Tensor()));
+                funcall.get()._lazy_closure.push_back(LazyTensor());
             }
         }
 
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
index 60f705a0c8f26c..641ee7690f4d34 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
@@ -94,8 +94,14 @@ ClosureRemap build_remap(const Function& fbody, const DCOFFParams& params_to) {
         } else if (ban_list.find(param) == ban_list.end()) {
             // If it's not in the ban list, it's an OK parameter and should be kept
             LOG_DEBUG("This is an OK parameter, will be kept");
-            m.weights_to_unpack.insert(i - fbody._param_offset);
             m.closure_remap.push_back(i - fbody._param_offset);
+
+            // Check if unpack is indeed required
+            const auto& type = param->get_element_type();
+            if (type == ov::element::i4 || type == ov::element::u4 || type == ov::element::i8 ||
+                type == ov::element::u8) {
+                m.weights_to_unpack.insert(i - fbody._param_offset);
+            }
         }
 
         // Process zero points for parameters