Skip to content

Commit

Permalink
[FSU] Modify the condition of LoadTensors
Browse files Browse the repository at this point in the history
Can find out the number of currently loaded tensors through the
getNumLoadedTensors function. When LoadTensors is executed once,
getNumLoadedTensors increases twice.

Added the leave_lookahead argument to LoadTensors.
Calculate leave_lookahead through getNumLoadedTensors and execute
loadTensors.

Signed-off-by: SeoHyungjun <[email protected]>
  • Loading branch information
SeoHyungjun committed Dec 27, 2024
1 parent 24a868d commit fb5d685
Show file tree
Hide file tree
Showing 13 changed files with 74 additions and 32 deletions.
12 changes: 10 additions & 2 deletions nntrainer/graph/network_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1562,8 +1562,8 @@ void NetworkGraph::flushCacheExcept(unsigned int order) {
tensor_manager->flushCacheExcept(order);
}

void NetworkGraph::LoadTensors(unsigned int order) {
tensor_manager->LoadTensors(order);
void NetworkGraph::LoadTensors(unsigned int order, unsigned int leave_lookahead) {
tensor_manager->LoadTensors(order, leave_lookahead);
}

bool NetworkGraph::checkLoadComplete(unsigned int order) {
Expand Down Expand Up @@ -1601,4 +1601,12 @@ void NetworkGraph::resetLossScale(float scale) {
}
}

unsigned int NetworkGraph::getNumLoadedWeightPoolTensors() {
return tensor_manager->getNumLoadedWeightPoolTensors();
}

unsigned int NetworkGraph::getNumLoadedTensorPoolTensors() {
return tensor_manager->getNumLoadedWeightPoolTensors();
}

} /* namespace nntrainer */
5 changes: 4 additions & 1 deletion nntrainer/graph/network_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ class NetworkGraph {
*
* @param order execution order
*/
void LoadTensors(const unsigned int order);
void LoadTensors(const unsigned int order, unsigned int leave_lookahead = 0);

/**
* @brief check data of order is loaded
Expand Down Expand Up @@ -501,6 +501,9 @@ class NetworkGraph {
*/
bool isMixedPrecision() { return (!istrequal(tensor_dtype[1], "FP32")); }

unsigned int getNumLoadedWeightPoolTensors();
unsigned int getNumLoadedTensorPoolTensors();

private:
std::map<std::string, std::string> sub_in_out; /** This is map to identify
input and output layer name of subgraph */
Expand Down
32 changes: 9 additions & 23 deletions nntrainer/models/neuralnet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,31 +383,17 @@ sharedConstTensors NeuralNetwork::forwarding(
the forwarding, ask load tensors for next n layers.
**/

if (f == 0)
model_graph.LoadTensors(f);

if (model_graph.checkLoadComplete(f)) {
node->forwarding(training);
ml_logd("Forwarding is done %d : %s", f, node->getName().c_str());

unsigned int lookahead =
std::get<props::MemorySwapLookahead>(model_flex_props);

if (lookahead != 0) {
if ((f) % (lookahead + 1) == lookahead - 1) {
std::cout << "request load tensor : " << f + lookahead + 1
<< std::endl;
ml_logd("request load tensor for %d", f + 1);
model_graph.LoadTensors((f / (lookahead + 1) + 1) *
(lookahead + 1));
}
} else {
model_graph.LoadTensors(f);
}
unsigned int lookahead =
std::get<props::MemorySwapLookahead>(model_flex_props);

if (f != 0)
model_graph.UnloadTensors(f);
if (!((model_graph.getNumLoadedWeightPoolTensors() + 1) / 2 < lookahead + 1)) {
model_graph.checkUnloadComplete(f-1);
}
model_graph.LoadTensors(f, lookahead - (model_graph.getNumLoadedWeightPoolTensors() + 1) / 2);

model_graph.checkLoadComplete(f);
node->forwarding(training);
model_graph.UnloadTensors(f);
}
};

Expand Down
4 changes: 4 additions & 0 deletions nntrainer/tensor/cache_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,8 @@ int CacheLoader::cancelAsync(int id) {
return ML_ERROR_NONE;
}

unsigned int CacheLoader::getNumLoadedTensors() {
return pool->getNumLoadedTensors();
}

} // namespace nntrainer
2 changes: 2 additions & 0 deletions nntrainer/tensor/cache_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ class CacheLoader {
*/
virtual int cancelAsync(int id);

virtual unsigned int getNumLoadedTensors();

private:
std::shared_ptr<CachePool> pool; /**< cache pool */
TaskExecutor *load_task_executor; /**< task executor */
Expand Down
4 changes: 4 additions & 0 deletions nntrainer/tensor/cache_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,4 +321,8 @@ void CachePool::unloadActives() {
elem->swapOut();
}

unsigned int CachePool::getNumLoadedTensors() {
return swap_device->getNumLoadedTensors();
}

} // namespace nntrainer
2 changes: 2 additions & 0 deletions nntrainer/tensor/cache_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ class CachePool : public MemoryPool {
*/
virtual std::string getName() { return name; }

virtual unsigned int getNumLoadedTensors();

protected:
/**
* @brief validate cache element
Expand Down
15 changes: 12 additions & 3 deletions nntrainer/tensor/manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ bool Manager::checkUnloadComplete(unsigned int order) {
return true;
}

void Manager::LoadTensors(unsigned int order) {
void Manager::LoadTensors(unsigned int order, unsigned int leave_lookahead) {
auto loadTensorsAsync = [&](TensorPool &pool, unsigned int order) {
return pool.loadCacheExecAsync(
order, [&](int id, TaskExecutor::CompleteStatus status) {
Expand All @@ -813,9 +813,10 @@ void Manager::LoadTensors(unsigned int order) {
async_load_tensor[o] = std::make_tuple(load_weight, load_tensor);
};

for (unsigned int i = order; i < order + swap_lookahead + 1; ++i) {
if (i <= max_exec_order)
for (unsigned int i = order; i < order + leave_lookahead + 1; ++i) {
if (i <= max_exec_order) {
enqueTasks(i);
}
}
}

Expand Down Expand Up @@ -896,4 +897,12 @@ void Manager::finalizeTensorPool(TensorPool &pool, unsigned int start,
pool.finalize(BasicPlanner(), start, end);
}

unsigned int Manager::getNumLoadedWeightPoolTensors() {
return weight_pool.getNumLoadedTensors();
}

unsigned int Manager::getNumLoadedTensorPoolTensors() {
return tensor_pool.getNumLoadedTensors();
}

} // namespace nntrainer
6 changes: 5 additions & 1 deletion nntrainer/tensor/manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ class Manager {
* @note preloading loads execution order data asynchronously,
* for lookahead size.
*/
void LoadTensors(unsigned int order);
void LoadTensors(unsigned int order, unsigned int leave_lookahead = 0);

/**
* @brief check completion of load data for the execution order
Expand Down Expand Up @@ -537,6 +537,10 @@ class Manager {
*/
bool isMixedPrecision() { return !istrequal(tensor_dtype[0], "FP32"); }

unsigned int getNumLoadedWeightPoolTensors();

unsigned int getNumLoadedTensorPoolTensors();

private:
/** @todo: merge this list to one */
std::vector<std::unique_ptr<Weight>> weights_v2; /**< weights for the layers
Expand Down
9 changes: 9 additions & 0 deletions nntrainer/tensor/swap_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) {
void *buf = static_cast<void *>(ptr + diff);
mapped[buf] = std::make_tuple(ptr, len, offset, (ssize_t)size);

++num_loaded_tensors;

return buf;
#else
off_t off;
Expand All @@ -95,6 +97,8 @@ void *SwapDevice::getBuffer(off_t offset, size_t size, bool alloc_only) {

allocated[ptr] = std::make_pair(offset, (ssize_t)size);

++num_loaded_tensors;

return ptr;
#endif
}
Expand Down Expand Up @@ -164,6 +168,11 @@ void SwapDevice::putBuffer(void *ptr, bool dealloc_only) {
#endif

#endif
--num_loaded_tensors;
}

unsigned int SwapDevice::getNumLoadedTensors() {
return num_loaded_tensors;
}

/**
Expand Down
9 changes: 7 additions & 2 deletions nntrainer/tensor/swap_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,17 @@ class SwapDevice {
*/
explicit SwapDevice(const std::string &name) :
dev_path(swap_device_default_path + name),
fd(-1) {}
fd(-1),
num_loaded_tensors(0) {}

/**
* @brief SwapDevice default constructor
*
*/
explicit SwapDevice(const std::string &path, const std::string &name) :
dev_path(path + "/" + name),
fd(-1) {}
fd(-1),
num_loaded_tensors(0) {}

/**
* @brief SwapDevice destructor
Expand Down Expand Up @@ -114,10 +116,13 @@ class SwapDevice {
*/
const std::string getDevicePath() const { return dev_path; }

unsigned int getNumLoadedTensors();

private:
const std::string dev_path; /**< device path */
int fd; /**< device file description */

unsigned int num_loaded_tensors;
#ifdef USE_MMAP
std::map<void *, std::tuple<void *, size_t, off_t, ssize_t>>
mapped; /**< <pointer, <orig_pointer, size, offset, origianl size>> */
Expand Down
4 changes: 4 additions & 0 deletions nntrainer/tensor/tensor_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,4 +490,8 @@ void TensorPool::loadCacheCancel(int id) {
cache_loader->cancelAsync(id);
}

unsigned int TensorPool::getNumLoadedTensors() {
return cache_loader->getNumLoadedTensors();
}

} // namespace nntrainer
2 changes: 2 additions & 0 deletions nntrainer/tensor/tensor_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ class TensorPool {
*/
void loadCacheCancel(int id);

unsigned int getNumLoadedTensors();

private:
/**
* @brief Source tensor detailed specification
Expand Down

0 comments on commit fb5d685

Please sign in to comment.