Skip to content

Commit

Permalink
fix: Accumulate loss value for metrics while gradient accumulation
Browse files Browse the repository at this point in the history
  • Loading branch information
lRomul committed Jul 2, 2023
1 parent 4522540 commit c6b288e
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions examples/cifar_advanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,15 @@ def train_step(self, batch, state) -> dict:
self.optimizer.zero_grad()

# Gradient accumulation
loss_value = 0
for i, chunk_batch in enumerate(deep_chunk(batch, self.iter_size)):
input, target = deep_to(chunk_batch, self.device, non_blocking=True)
with torch.cuda.amp.autocast(enabled=self.amp):
prediction = self.nn_module(input)
loss = self.loss(prediction, target)
loss = loss / self.iter_size

self.grad_scaler.scale(loss).backward()
loss_value += loss.item()

self.grad_scaler.step(self.optimizer)
self.grad_scaler.update()
Expand All @@ -143,7 +144,7 @@ def train_step(self, batch, state) -> dict:
return {
'prediction': prediction,
'target': target,
'loss': loss.item()
'loss': loss_value
}


Expand Down

0 comments on commit c6b288e

Please sign in to comment.