openvinotoolkit · AsyaPronina · Oct 31, 2024 · Oct 16, 2024 · Nov 12, 2024 · dmatveev
@@ -59,6 +59,7 @@ DEFINE_OPT(NPUW_FUNCALL_ASYNC, bool, false, npuw::funcall_async, RunTime);
 DEFINE_OPT(NPUW_ACC_CHECK, bool, false, npuw::accuracy::check, RunTime);
 DEFINE_OPT(NPUW_ACC_THRESH, double, 0.01, npuw::accuracy::threshold, RunTime);
 DEFINE_OPT(NPUW_ACC_DEVICE, std::string, "", npuw::accuracy::reference_device, RunTime);
+DEFINE_OPT(NPUW_ACC_DUMP_FAILS, bool, false, npuw::accuracy::dump_failures, RunTime);
 DEFINE_OPT(NPUW_DUMP_FULL, bool, false, npuw::dump::full, CompileTime);
 DEFINE_OPT(NPUW_DUMP_SUBS, std::string, "", npuw::dump::subgraphs, CompileTime);
 DEFINE_OPT(NPUW_DUMP_SUBS_ON_FAIL, std::string, "", npuw::dump::subgraphs_on_fail, CompileTime);

@@ -307,6 +307,14 @@ static constexpr ov::Property<double> threshold{"NPUW_ACC_THRESH"};
  * Default value: empty.
  */
 static constexpr ov::Property<std::string> reference_device{"NPUW_ACC_DEVICE"};
+
+/**
+ * @brief
+ * Type: bool.
+ * Enable dumps of materials for model(s), failing accuracy check.
+ * Default value: false.
+ */
+static constexpr ov::Property<std::string> dump_failures{"NPUW_ACC_DUMP_FAILS"};
 }  // namespace accuracy
 
 namespace dump {

@@ -44,6 +44,7 @@ void intel_npu::registerNPUWOptions(OptionsDesc& desc) {
     desc.add<NPUW_ACC_CHECK>();
     desc.add<NPUW_ACC_THRESH>();
     desc.add<NPUW_ACC_DEVICE>();
+    desc.add<NPUW_ACC_DUMP_FAILS>();
 #ifdef NPU_PLUGIN_DEVELOPER_BUILD
     desc.add<NPUW_DUMP_FULL>();
     desc.add<NPUW_DUMP_SUBS>();

@@ -13,30 +13,47 @@
 ov::npuw::metrics::NRMSE::NRMSE(double threshold) : m_threshold(threshold) {}
 
 bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
-                                          const ov::SoPtr<ov::ITensor>& reference) const {
-    NPUW_ASSERT(actual->is_continuous());
-    NPUW_ASSERT(reference->is_continuous());
+                                          const ov::SoPtr<ov::ITensor>& reference,
+                                          double* result) const {
     NPUW_ASSERT(actual->get_shape() == reference->get_shape());
     // Check for alignment:
     NPUW_ASSERT(actual->get_byte_size() == reference->get_byte_size());
-    // FIXME: Check for strides
+
+    ov::Tensor in_actual(actual->get_element_type(), actual->get_shape());
+    ov::Tensor in_reference(reference->get_element_type(), reference->get_shape());
+
+    if (!actual->is_continuous()) {
+        ov::make_tensor(actual).copy_to(in_actual);
+    } else {
+        in_actual = ov::make_tensor(actual);
+    }
+    if (!reference->is_continuous()) {
+        ov::make_tensor(reference).copy_to(in_reference);
+    } else {
+        in_reference = ov::make_tensor(reference);
+    }
+
+    // TODO: it might be more correct to make to_f32 function
+    //       to work with strided tensors
+    NPUW_ASSERT(in_actual.is_continuous());
+    NPUW_ASSERT(in_reference.is_continuous());
 
     ov::Tensor actual_f32;
     ov::Tensor reference_f32;
 
-    if (ov::element::Type_t::f32 == actual->get_element_type()) {
-        actual_f32 = ov::make_tensor(actual);
+    if (ov::element::f32 == in_actual.get_element_type()) {
+        actual_f32 = in_actual;
     } else {
-        ov::Tensor dst(ov::element::Type_t::f32, actual->get_shape());
-        ov::npuw::util::to_f32(ov::make_tensor(actual), dst);
+        ov::Tensor dst(ov::element::Type_t::f32, in_actual.get_shape());
+        ov::npuw::util::to_f32(in_actual, dst);
         actual_f32 = std::move(dst);
     }
 
-    if (ov::element::Type_t::f32 == reference->get_element_type()) {
-        reference_f32 = ov::make_tensor(reference);
+    if (ov::element::f32 == in_reference.get_element_type()) {
+        reference_f32 = in_reference;
     } else {
-        ov::Tensor dst(ov::element::Type_t::f32, reference->get_shape());
-        ov::npuw::util::to_f32(ov::make_tensor(reference), dst);
+        ov::Tensor dst(ov::element::Type_t::f32, in_reference.get_shape());
+        ov::npuw::util::to_f32(in_reference, dst);
         reference_f32 = dst;
     }
 
@@ -51,13 +68,21 @@ bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
     }
 
     if (squared_error <= std::numeric_limits<double>::epsilon()) {
-        LOG_INFO("NRMSE loss: 0.0, threshold: " << m_threshold << ".");
-        LOG_INFO("PASS");
+        if (result != nullptr) {
+            *result = 0.0;
+        }
         return true;
     }
 
     double rmse = sqrt(squared_error / size);
-    NPUW_ASSERT(rmse >= 0.0);
+
+    if (rmse < 0.0) {
+        // Calculated RMSE metric is < 0.0, what is unexpected. So, return that tensors are unequal.
+        if (result != nullptr) {
+            *result = rmse;
+        }
+        return false;
+    }
 
     auto actual_min_max = std::minmax_element(actual_data, actual_data + size);
     auto reference_min_max = std::minmax_element(reference_data, reference_data + size);
@@ -66,9 +91,8 @@ bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
                            std::max(0.f, *actual_min_max.second) - std::min(0.f, *actual_min_max.first)});
 
     double nrmse = rmse / den;
-    LOG_INFO("NRMSE loss: " << nrmse << ", threshold: " << m_threshold << ".");
-
-    bool success = nrmse <= m_threshold;
-    LOG_INFO((success ? "PASS" : "FAIL"));
-    return success;
+    if (result != nullptr) {
+        *result = nrmse;
+    }
+    return nrmse <= m_threshold;
 }
@@ -15,8 +15,9 @@ namespace metrics {
 class NRMSE {
 public:
     explicit NRMSE(double threshold);
-    bool operator()(const ov::SoPtr<ov::ITensor>& backup_tensor, const ov::SoPtr<ov::ITensor>& original_tensor) const;
-
+    bool operator()(const ov::SoPtr<ov::ITensor>& backup_tensor,
+                    const ov::SoPtr<ov::ITensor>& original_tensor,
+                    double* result = nullptr) const;
 private:
     double m_threshold{};
 };