From d33ada4bd9e18cd01f56dce372d6d294c166c137 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Fri, 29 Nov 2024 12:26:27 +0000 Subject: [PATCH] [NPUW] LazyTensor refactoring (#27798) Mirror of https://github.com/openvinotoolkit/openvino/pull/27108 --- .../intel_npu/src/plugin/npuw/lazy_tensor.cpp | 398 ++++++++---------- .../intel_npu/src/plugin/npuw/lazy_tensor.hpp | 32 +- .../plugin/npuw/partitioning/partitioning.cpp | 27 +- .../npuw/partitioning/patterns/dcoff.cpp | 8 +- 4 files changed, 208 insertions(+), 257 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp index 8a0317a9f714e8..81521222ae6fae 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp @@ -4,41 +4,166 @@ #include "lazy_tensor.hpp" -using ov::npuw::weights::ConcatMeta; -using ov::npuw::weights::ConstPtr; +#include +#include +#include + +#include "logging.hpp" +#include "openvino/runtime/make_tensor.hpp" +#include "util.hpp" + using ov::npuw::weights::LazyTensor; -using ov::npuw::weights::OrigData; -using ov::npuw::weights::Transform; -using ov::npuw::weights::TransformType; -using ov::npuw::weights::UnpackMeta; namespace ov { namespace npuw { namespace weights { +namespace op { +struct Const { + std::shared_ptr node; + + std::size_t hash() const { + std::size_t seed = std::hash()(node->get_data_ptr()) + 0x9e3779b9; + seed ^= node->get_element_type().hash() + 0x9e3779b9; + for (const auto& dim : node->get_shape()) { + seed ^= std::hash()(dim) + 0x9e3779b9; + } + return seed; + } + bool operator==(const Const& other) const { + return (node->get_shape() == other.node->get_shape() && + node->get_element_type() == other.node->get_element_type() && + node->get_data_ptr() == other.node->get_data_ptr()); + } + ov::Tensor eval() const { + return ov::npuw::util::tensor_from_const(node); + } +}; +struct Concat { + std::vector tensors; + std::size_t axis; + + std::size_t hash() const { + std::size_t seed = std::hash()(axis) + 0x9e3779b9; + for (auto& lt : tensors) { + seed ^= lt.get_hash() + 0x9e3779b9; + } + return seed; + } + bool operator==(const Concat& other) const { + return (axis == other.axis && tensors == other.tensors); + } + ov::Tensor eval() const { + std::vector to_concat; + for (const auto& lt : tensors) { + to_concat.push_back(lt.eval()); + } + return ov::npuw::util::concat(to_concat, axis); + } +}; + +struct Unpack { + LazyTensor w, z, s; + ov::element::Type type; + ov::Shape shape; + + std::size_t hash() const { + std::size_t seed = w.get_hash() + 0x9e3779b9; + seed ^= z.get_hash() + 0x9e3779b9; + seed ^= s.get_hash() + 0x9e3779b9; + seed ^= type.hash() + 0x9e3779b9; + for (const auto& dim : shape) { + seed ^= std::hash()(dim) + 0x9e3779b9; + } + return seed; + } + bool operator==(const Unpack& other) const { + return (type == other.type && shape == other.shape && w == other.w && z == other.z && s == other.s); + } + ov::Tensor eval() const { + const auto& gti = ov::get_tensor_impl; + const auto& tw = w.eval(); + const auto& tz = z.eval(); + const auto& ts = s.eval(); + NPUW_ASSERT(tw); + ov::Tensor dst(type, shape); + if (tw && tz && ts) { + ov::npuw::util::unpack(gti(tw), gti(tz), gti(ts), gti(dst)); + } else if (tw && ts) { + ov::npuw::util::unpack(gti(tw), gti(ts), gti(dst)); + } else { + NPUW_ASSERT(false && "Unsupported combination"); + } + return dst; + } +}; +struct Permute { + LazyTensor tensor; + std::vector axes; + + std::size_t hash() const { + std::size_t seed = tensor.get_hash() + 0x9e3779b9; + for (const auto& axis : axes) { + seed ^= std::hash()(axis) + 0x9e3779b9; + } + return seed; + } + bool operator==(const Permute& other) const { + return (axes == other.axes && tensor == other.tensor); + } + ov::Tensor eval() const { + return ov::npuw::util::permute(tensor.eval(), axes); + } +}; +struct Convert { + LazyTensor tensor; + ov::element::Type type; + + std::size_t hash() const { + std::size_t seed = type.hash() + 0x9e3779b9; + seed ^= tensor.get_hash() + 0x9e3779b9; + return seed; + } + bool operator==(const Convert& other) const { + return (type == other.type && tensor == other.tensor); + } + ov::Tensor eval() const { + NPUW_ASSERT(ov::element::f16 == type); + return ov::npuw::util::to_f16(tensor.eval()); + } +}; +} // namespace op + +using Transform = std::variant; struct LazyTensorImpl { public: LazyTensorImpl() = default; - LazyTensorImpl(const TransformType& type, const Transform& transform); - - bool operator==(const LazyTensorImpl& other) const; + explicit LazyTensorImpl(Transform&& t); ov::Tensor eval() const; - ov::Tensor get_orig_tensor() const; - + bool operator==(const LazyTensorImpl& other) const; std::size_t get_hash() const; - bool has_transformations() const; - - std::shared_ptr m_parent = nullptr; - std::pair m_transform; + Transform m_transform; std::size_t m_hash = 0; +}; + +} // namespace weights +} // namespace npuw +} // namespace ov + +using namespace ov::npuw::weights::op; +using ov::npuw::weights::LazyTensorImpl; +using ov::npuw::weights::Transform; - void* m_orig_data = nullptr; - ov::Shape m_orig_shape; - ov::element::Type m_orig_type; +// std::visit helper +template +struct overloaded : Ts... { + using Ts::operator()...; }; +template +overloaded(Ts...) -> overloaded; std::size_t LazyTensorImpl::get_hash() const { // Already calculated @@ -46,120 +171,23 @@ std::size_t LazyTensorImpl::get_hash() const { return m_hash; } - // Get parent's hash + // Get hash std::size_t seed = 0; - if (m_parent) { - seed = m_parent->get_hash(); - } else { - seed = std::hash()(m_orig_data) + 0x9e3779b9; - for (const auto& dim : m_orig_shape) { - seed ^= std::hash()(dim) + 0x9e3779b9; - } - seed ^= m_orig_type.hash() + 0x9e3779b9; - } - - // Combine with this hash - seed ^= std::hash()(static_cast(m_transform.first)) + 0x9e3779b9; - if (m_transform.first == TransformType::PERMUTE) { - const auto& axes = std::get>(m_transform.second); - for (const auto& axis : axes) { - seed ^= std::hash()(axis) + 0x9e3779b9; - } - } else if (m_transform.first == TransformType::CONCAT) { - const auto& axis = std::get(m_transform.second).second; - seed ^= std::hash()(axis) + 0x9e3779b9; - for (auto& lt : std::get(m_transform.second).first) { - seed ^= lt.get_hash() + 0x9e3779b9; - } - } else if (m_transform.first == TransformType::UNPACK) { - const auto& unpack_meta = std::get(m_transform.second); - seed ^= std::get<0>(unpack_meta).get_hash() + 0x9e3779b9; - seed ^= std::get<1>(unpack_meta).get_hash() + 0x9e3779b9; - seed ^= std::get<2>(unpack_meta).get_hash() + 0x9e3779b9; - for (const auto& dim : std::get<3>(unpack_meta)) { - seed ^= std::hash()(dim) + 0x9e3779b9; - } - seed ^= std::get<4>(unpack_meta).hash() + 0x9e3779b9; - } + std::visit(overloaded{[&seed](const auto& op) { + seed ^= op.hash(); + }}, + m_transform); return seed; } -} // namespace weights -} // namespace npuw -} // namespace ov - -using ov::npuw::weights::LazyTensorImpl; - -LazyTensorImpl::LazyTensorImpl(const TransformType& type, const Transform& transform) { - if (type == TransformType::THIS && std::holds_alternative(transform)) { - m_transform = std::make_pair(type, transform); - ov::Tensor tensor; - if (std::holds_alternative(std::get(transform))) { - tensor = ov::npuw::util::tensor_from_const(std::get(std::get(transform))); - } else { - tensor = std::get(std::get(transform)); - if (!tensor) { - // Don't set anything - return; - } - } - m_orig_data = tensor.data(); - m_orig_shape = tensor.get_shape(); - m_orig_type = tensor.get_element_type(); - } else if (type == TransformType::CONCAT && std::holds_alternative(transform)) { - m_transform = std::make_pair(type, transform); - } else if (type == TransformType::UNPACK && std::holds_alternative(transform)) { - m_transform = std::make_pair(type, transform); - } else { - NPUW_ASSERT(false); - } +LazyTensorImpl::LazyTensorImpl(Transform&& t) { + m_transform = std::move(t); m_hash = get_hash(); } bool LazyTensorImpl::operator==(const LazyTensorImpl& other) const { - if (m_hash != other.m_hash || m_orig_data != other.m_orig_data || m_orig_shape != other.m_orig_shape || - m_orig_type != other.m_orig_type || m_transform.first != other.m_transform.first) { - return false; - } - - switch (m_transform.first) { - case TransformType::THIS: - // everything is already compared above - skip - break; - case TransformType::CONVERT: - // everything is already compared above - skip - break; - case TransformType::PERMUTE: - if (std::get>(m_transform.second) != - std::get>(other.m_transform.second)) { - return false; - } - break; - case TransformType::CONCAT: - if (std::get(m_transform.second) != std::get(other.m_transform.second)) { - return false; - } - break; - case TransformType::UNPACK: - if (std::get(m_transform.second) != std::get(other.m_transform.second)) { - return false; - } - break; - default: - NPUW_ASSERT(false); - break; - } - - if ((m_parent && !other.m_parent) || (!m_parent && other.m_parent)) { - return false; - } - - if (m_parent && other.m_parent) { - return *m_parent.get() == *other.m_parent.get(); - } - - return true; + return m_hash == other.m_hash && m_transform == other.m_transform; } ov::Tensor LazyTensorImpl::eval() const { @@ -173,82 +201,37 @@ ov::Tensor LazyTensorImpl::eval() const { Perhaps it should be done after model compilation and not handled here. */ - // Process the initial tensor - either from Const or from Concat - if (!m_parent) { - if (m_transform.first == TransformType::THIS) { - return get_orig_tensor(); - } else if (m_transform.first == TransformType::CONCAT) { - std::vector to_concat; - for (const auto& lt : std::get(m_transform.second).first) { - // Sanity check - NPUW_ASSERT(!lt.has_transformations()); - to_concat.push_back(lt.get_orig_tensor()); - } - return ov::npuw::util::concat(to_concat, std::get(m_transform.second).second); - } else if (m_transform.first == TransformType::UNPACK) { - const auto& unpack_meta = std::get(m_transform.second); - const auto& cw = std::get<0>(unpack_meta); - const auto& cz = std::get<1>(unpack_meta); - const auto& cs = std::get<2>(unpack_meta); - const auto& shape = std::get<3>(unpack_meta); - const auto& type = std::get<4>(unpack_meta); - - // Note: unpacking done in-place since the original tensor is empty at this point - NPUW_ASSERT(!cw.has_transformations()); - NPUW_ASSERT(!cs.has_transformations()); - // FIXME: Ugly check concat case as well since cz might be not set - if (cz.has_transformations()) { - NPUW_ASSERT(false); - } - - const auto& gti = ov::get_tensor_impl; - const auto& tw = cw.get_orig_tensor(); - const auto& tz = cz.get_orig_tensor(); - const auto& ts = cs.get_orig_tensor(); - ov::Tensor dst(type, shape); - if (tw && tz && ts) { - ov::npuw::util::unpack(gti(tw), gti(tz), gti(ts), gti(dst)); - } else if (tw && ts) { - ov::npuw::util::unpack(gti(tw), gti(ts), gti(dst)); - } else { - NPUW_ASSERT(false && "Unsupported combination"); - } - return dst; - } else { - NPUW_ASSERT(false); - } - } - - // Process transformation - switch (m_transform.first) { - case TransformType::PERMUTE: - return ov::npuw::util::permute(m_parent->eval(), std::get>(m_transform.second)); - case TransformType::CONVERT: - return ov::npuw::util::to_f16(m_parent->eval()); - default: - NPUW_ASSERT(false); - } - - NPUW_ASSERT(false); - return ov::Tensor(); + ov::Tensor result = std::visit(overloaded{[](const auto& op) { + return op.eval(); + }}, + m_transform); + NPUW_ASSERT(result); + return result; } -ov::Tensor LazyTensorImpl::get_orig_tensor() const { - // Sanity check - NPUW_ASSERT(!has_transformations()); - if (std::holds_alternative(std::get(m_transform.second))) { - return ov::npuw::util::tensor_from_const(std::get(std::get(m_transform.second))); - } - return std::get(std::get(m_transform.second)); +LazyTensor::LazyTensor(const std::shared_ptr& const_ptr) + : m_impl(std::make_shared(op::Const{const_ptr})) {} +LazyTensor::LazyTensor(const std::vector& to_concat, const std::size_t axis) + : m_impl(std::make_shared(op::Concat{to_concat, axis})) {} +LazyTensor::LazyTensor(const LazyTensor& cw, + const LazyTensor& cz, + const LazyTensor& cs, + const ov::element::Type& type, + const ov::Shape& shape) + : m_impl(std::make_shared(op::Unpack{cw, cz, cs, type, shape})) {} + +LazyTensor LazyTensor::permute(const std::vector& axes) { + LazyTensor new_lt; + new_lt.m_impl = std::make_shared(op::Permute{*this, axes}); + return new_lt; } -bool LazyTensorImpl::has_transformations() const { - return m_transform.first != TransformType::THIS; +LazyTensor LazyTensor::convert(const ov::element::Type& type) { + LazyTensor new_lt; + new_lt.m_impl = std::make_shared(op::Convert{*this, type}); + return new_lt; } -LazyTensor::LazyTensor(const TransformType& type, const Transform& transform) - : m_impl(std::make_shared(type, transform)) {} - bool LazyTensor::operator==(const LazyTensor& other) const { return *m_impl.get() == *other.m_impl.get(); } @@ -257,37 +240,20 @@ bool LazyTensor::operator!=(const LazyTensor& other) const { return !(*m_impl.get() == *other.m_impl.get()); } -void LazyTensor::update(const TransformType& type, const Transform& transform) { - const auto& curr = m_impl; - auto new_lt = std::make_shared(); - - new_lt->m_orig_data = curr->m_orig_data; - new_lt->m_orig_shape = curr->m_orig_shape; - new_lt->m_orig_type = curr->m_orig_type; - - new_lt->m_transform = std::make_pair(type, transform); - new_lt->m_parent = curr; - new_lt->m_hash = new_lt->get_hash(); - - m_impl = new_lt; -} - ov::Tensor LazyTensor::eval() const { + if (!m_impl) { + return ov::Tensor(); + } return m_impl->eval(); } -ov::Tensor LazyTensor::get_orig_tensor() const { - return m_impl->get_orig_tensor(); -} - std::size_t LazyTensor::get_hash() const { + if (!m_impl) { + return 0; + } return m_impl->get_hash(); } std::size_t LazyTensor::Hash::operator()(const LazyTensor& lt) const { return lt.get_hash(); } - -bool LazyTensor::has_transformations() const { - return m_impl->has_transformations(); -} diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp index 5cdeeba058e45f..365d9d636872b8 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp @@ -5,33 +5,17 @@ #pragma once #include -#include -#include -#include -#include -#include "logging.hpp" -#include "openvino/runtime/make_tensor.hpp" +#include "openvino/op/constant.hpp" #include "openvino/runtime/tensor.hpp" -#include "util.hpp" namespace ov { namespace npuw { namespace weights { - -enum class TransformType : int { THIS, PERMUTE, CONVERT, CONCAT, UNPACK }; - // Forward declaration class LazyTensor; struct LazyTensorImpl; -using ConcatMeta = std::pair, std::size_t>; -using UnpackMeta = std::tuple; -using ConstPtr = std::shared_ptr; -using OrigData = std::variant; - -using Transform = std::variant, std::monostate, ConcatMeta, UnpackMeta>; - class LazyTensor { public: class Hash { @@ -40,17 +24,23 @@ class LazyTensor { }; LazyTensor() = default; - LazyTensor(const TransformType& type, const Transform& transform); + LazyTensor(const std::shared_ptr& const_ptr); + LazyTensor(const std::vector& to_concat, const std::size_t axis); // construct from concat + LazyTensor(const LazyTensor& cw, + const LazyTensor& cz, + const LazyTensor& cs, + const ov::element::Type& type, + const ov::Shape& shape); // construct from unpack + + LazyTensor permute(const std::vector& axes); + LazyTensor convert(const ov::element::Type& type); bool operator==(const LazyTensor& other) const; bool operator!=(const LazyTensor& other) const; - void update(const TransformType& type, const Transform& transform); ov::Tensor eval() const; - ov::Tensor get_orig_tensor() const; std::size_t get_hash() const; - bool has_transformations() const; private: std::shared_ptr m_impl = nullptr; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp index 99705fef30e8a8..2ff41be4c19f78 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp @@ -1525,8 +1525,7 @@ void Partitioner::createFunction(FunctionPipeline& func_ggg) { LOG_DEBUG("Register " << prod_output << " in the function closure"); funcall._lazy_closure.push_back( - LazyTensor(TransformType::THIS, - std::static_pointer_cast(input_node))); // (n)/1/i/c + LazyTensor(std::static_pointer_cast(input_node))); // (n)/1/i/c } else if (ov::op::util::is_parameter(input_node)) { LOG_DEBUG("Handling a Parameter input " << prod_output); LOG_BLOCK(); @@ -1695,8 +1694,7 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) { LOG_DEBUG("Register " << prod_output << " in the function closure[" << param_idx << "] (via prototype " << proto_layer_name << ")"); funcall._lazy_closure[param_idx - function._param_offset] = - LazyTensor(TransformType::THIS, - std::static_pointer_cast(input_node)); // (t)/1/c + LazyTensor(std::static_pointer_cast(input_node)); // (t)/1/c } } // for (inputs) } // for(nodes) @@ -1765,7 +1763,7 @@ void Partitioner::optimize(const std::string& func_name) { auto closure_idx = param_idx - f._param_offset; ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) { auto& funcall = func_group.refs[f_idx].get(); - funcall._lazy_closure[closure_idx].update(TransformType::PERMUTE, p.second); + funcall._lazy_closure[closure_idx] = funcall._lazy_closure[closure_idx].permute(p.second); }); } }; @@ -1775,7 +1773,7 @@ void Partitioner::optimize(const std::string& func_name) { auto closure_idx = param_idx - f._param_offset; ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) { auto& funcall = func_group.refs[f_idx].get(); - funcall._lazy_closure[closure_idx].update(TransformType::CONVERT, std::monostate{}); + funcall._lazy_closure[closure_idx] = funcall._lazy_closure[closure_idx].convert(ov::element::f16); }); } }; @@ -1830,15 +1828,12 @@ void Partitioner::optimize(const std::string& func_name) { std::vector to_concat; // Fill tensor vector for (auto&& cidx : to_concat_idx) { - // FIXME: Assuming here concat goes first and other transformations later. - // This allows to store ov::Tensor and ignore their potential history of transformations - NPUW_ASSERT(!funcall._lazy_closure[cidx].has_transformations()); to_concat.push_back(funcall._lazy_closure[cidx]); } // Note: we can ignore updating funcall._lazy_closure[cidx] here since those LazyTensors will be gone // and the new one added into the vector if (!to_concat.empty()) { - funcall._lazy_closure.push_back(LazyTensor(TransformType::CONCAT, std::make_pair(to_concat, axis))); + funcall._lazy_closure.push_back(LazyTensor(to_concat, axis)); // Some of the tensors might be in closure - preserve it's 1:1 idx mapping with _lazy_closure funcall._closure.push_back(ov::Tensor()); } @@ -1865,17 +1860,11 @@ void Partitioner::optimize(const std::string& func_name) { ov::parallel_for(func_group.refs.size(), [&](std::size_t f_idx) { auto& funcall = func_group.refs[f_idx].get(); - // FIXME: assuming no transformations were applied to the tensor - since we are utilizing the original - // ov::Tensor below LazyTensor cw = funcall._lazy_closure[w_idx - f._param_offset]; - LazyTensor cz = z_idx != -1 ? funcall._lazy_closure[z_idx - f._param_offset] - : LazyTensor(TransformType::THIS, ov::Tensor()); + LazyTensor cz = z_idx != -1 ? funcall._lazy_closure[z_idx - f._param_offset] : LazyTensor(); LazyTensor cs = funcall._lazy_closure[s_idx - f._param_offset]; - - // FIXME: currently there is an issue that we don't share such tensor between head and tail funcall._lazy_closure.push_back( - LazyTensor(TransformType::UNPACK, - std::make_tuple(cw, cz, cs, p.first->get_shape(), p.first->get_element_type()))); + LazyTensor(cw, cz, cs, p.first->get_element_type(), p.first->get_shape())); // Some of the tensors might be in closure - preserve it's 1:1 idx mapping with _lazy_closure funcall._closure.push_back(ov::Tensor()); }); @@ -1899,7 +1888,7 @@ void Partitioner::optimize(const std::string& func_name) { // Based on our logic (when tensors get transferred from lazy tensors via bank // to the closure), this tensor should be non-empty to avoid this process. funcall.get()._closure.push_back(ov::Tensor(new_elem_type, new_shape)); - funcall.get()._lazy_closure.push_back(LazyTensor(TransformType::THIS, ov::Tensor())); + funcall.get()._lazy_closure.push_back(LazyTensor()); } } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp index 60f705a0c8f26c..641ee7690f4d34 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp @@ -94,8 +94,14 @@ ClosureRemap build_remap(const Function& fbody, const DCOFFParams& params_to) { } else if (ban_list.find(param) == ban_list.end()) { // If it's not in the ban list, it's an OK parameter and should be kept LOG_DEBUG("This is an OK parameter, will be kept"); - m.weights_to_unpack.insert(i - fbody._param_offset); m.closure_remap.push_back(i - fbody._param_offset); + + // Check if unpack is indeed required + const auto& type = param->get_element_type(); + if (type == ov::element::i4 || type == ov::element::u4 || type == ov::element::i8 || + type == ov::element::u8) { + m.weights_to_unpack.insert(i - fbody._param_offset); + } } // Process zero points for parameters