Skip to content

Commit

Permalink
Add an option to disable manual GC in validation (#7467) (#7476)
Browse files Browse the repository at this point in the history
Signed-off-by: Sangkug Lym <[email protected]>
Co-authored-by: Sangkug Lym <[email protected]>
  • Loading branch information
2 people authored and yaoyu-33 committed Oct 13, 2023
1 parent b6fecc5 commit 292d232
Showing 1 changed file with 5 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True):
}

self.gc_interval = cfg.get('gc_interval', 0)
# Do manual garbage collection during validation routine when gc_interval > 0
self.gc_in_validation = bool(int(os.getenv("NEMO_MANUAL_GC_IN_VALIDATION", 1)))
assert self.gc_interval >= 0, "gc_interval should be an integer value larger than or equal to 0."
# If gc_interval > 0, memory garbage collection is manually controlled.
# The automatic garbage collector sould be disabled before training starts.
Expand Down Expand Up @@ -284,12 +286,12 @@ def on_train_start(self) -> None:

def on_validation_start(self) -> None:
super().on_validation_start()
if self.gc_interval > 0:
if self.gc_interval > 0 and self.gc_in_validation:
gc.collect()

def on_validation_end(self) -> None:
super().on_validation_end()
if self.gc_interval > 0:
if self.gc_interval > 0 and self.gc_in_validation:
gc.collect()

def _build_vocab(self):
Expand Down Expand Up @@ -447,7 +449,7 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus
def on_validation_batch_end(self, outputs, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> None:
super().on_validation_batch_end(outputs, batch, batch_idx, dataloader_idx)

if self.gc_interval > 0:
if self.gc_interval > 0 and self.gc_in_validation:
if self.validation_global_step % self.gc_interval == 0:
gc.collect()
self.validation_global_step += 1
Expand Down

0 comments on commit 292d232

Please sign in to comment.