Skip to content

Commit

Permalink
NPUW Deref: clean-up CPU changes, move detach to eval() - draft
Browse files Browse the repository at this point in the history
  • Loading branch information
dmatveev committed Nov 27, 2024
1 parent c2ad6fe commit ef5fde0
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 38 deletions.
1 change: 0 additions & 1 deletion src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,6 @@ void ov::npuw::CompiledModel::detach_memory() {
// proto_comp_model_desc.compiled_model = {}; // Shouldn't be here, CPU only
}
}
m_weights_bank->detach();
LOG_INFO("Done");
}

Expand Down
40 changes: 6 additions & 34 deletions src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,22 +110,13 @@ ov::Tensor Bank::eval_and_alloc(const LazyTensor& tensor, Bank::DeviceBank &dban

std::unique_lock<std::mutex> guard(dbank.mutex);
if (device_for_alloc == "CPU") {
// REVERTME:{{{
// Store a copy of the tensor memory even on CPU - to simulate
// bank load.

ov::Tensor new_tensor(transformed_tensor.get_element_type(), transformed_tensor.get_shape());
dbank.storage[tensor] = new_tensor;
guard.unlock();

transformed_tensor.copy_to(new_tensor);
return new_tensor;
// Old code here:
// m_device_bank[device_for_alloc][tensor] = transformed_tensor;
// return transformed_tensor;
// REVERTME:}}}
dbank.storage[tensor] = transformed_tensor;
return transformed_tensor;
}

// Non-CPU case: detach the evaluated LazyTensor from its memory
const_cast<LazyTensor&>(tensor).detach();

ov::SoPtr<ov::ITensor> remote_tensor;
ov::Tensor allocated_tensor;

Expand All @@ -134,7 +125,7 @@ ov::Tensor Bank::eval_and_alloc(const LazyTensor& tensor, Bank::DeviceBank &dban
remote_ctx->create_host_tensor(transformed_tensor.get_element_type(), transformed_tensor.get_shape());
allocated_tensor = ov::make_tensor(remote_tensor);
dbank.storage[tensor] = allocated_tensor;
guard.unlock();
guard.unlock(); // Unlock the guard, map update is done - copy can continue in parallel

transformed_tensor.copy_to(allocated_tensor);
return allocated_tensor;
Expand All @@ -150,25 +141,6 @@ bool Bank::is_remote(const LazyTensor& tensor) const {
return false;
}

void Bank::detach() {
std::lock_guard<std::mutex> guard(m_mutex);
for (auto&& bank : m_device_banks) {
auto& device_bank = bank.second;

// FIXME: Uncomment it later (after the CPU copy revert)
// const auto &device_str = bank.first;
// if (device_str == "CPU") {
// // CPU memory is non-detachable
// continue;
// }

std::lock_guard<std::mutex> dev_guard(device_bank.mutex);
for (auto &&lt : device_bank.storage) {
const_cast<LazyTensor&>(lt.first).detach();
}
}
}

std::shared_ptr<Bank> BankManager::getBank(const std::string& bank_name,
const std::shared_ptr<const ov::ICore>& core,
const std::string& alloc_device) {
Expand Down
3 changes: 0 additions & 3 deletions src/plugins/intel_npu/src/plugin/npuw/weights_bank.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ class Bank {
void evaluate_and_allocate();
bool is_remote(const LazyTensor& tensor) const;

// Drop references to the original buffers, if any
void detach();

private:
// Bank for specified device and their allocated memory
struct DeviceBank {
Expand Down

0 comments on commit ef5fde0

Please sign in to comment.