From c6b288e65ddccba3cc24f0c96f13dc86ae7c086f Mon Sep 17 00:00:00 2001 From: Ruslan Baikulov Date: Sun, 2 Jul 2023 22:51:56 +0300 Subject: [PATCH] fix: Accumulate loss value for metrics while gradient accumulation --- examples/cifar_advanced.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/cifar_advanced.py b/examples/cifar_advanced.py index b8169ec..f514de2 100644 --- a/examples/cifar_advanced.py +++ b/examples/cifar_advanced.py @@ -125,14 +125,15 @@ def train_step(self, batch, state) -> dict: self.optimizer.zero_grad() # Gradient accumulation + loss_value = 0 for i, chunk_batch in enumerate(deep_chunk(batch, self.iter_size)): input, target = deep_to(chunk_batch, self.device, non_blocking=True) with torch.cuda.amp.autocast(enabled=self.amp): prediction = self.nn_module(input) loss = self.loss(prediction, target) loss = loss / self.iter_size - self.grad_scaler.scale(loss).backward() + loss_value += loss.item() self.grad_scaler.step(self.optimizer) self.grad_scaler.update() @@ -143,7 +144,7 @@ def train_step(self, batch, state) -> dict: return { 'prediction': prediction, 'target': target, - 'loss': loss.item() + 'loss': loss_value }