Skip to content

Commit

Permalink
added train_samples keyword for compliance check (#645)
Browse files Browse the repository at this point in the history
* added train_samples keyword for compliance check

* added cache clear logging
  • Loading branch information
anmolgupt authored May 30, 2024
1 parent accfadf commit 2ea9ba1
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions large_language_model/megatron-lm/megatron/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ def pretrain(train_valid_test_dataset_provider,
args_defaults: a dictionary from argument-name to argument-value. It
to set already parse arguments.
"""
# The reference implementation does not clear the cache currently
# but the submissions are required to do so
mllogger.event(key=mllogger.constants.CACHE_CLEAR, value=True)
mllogger.start(key=mllogger.constants.INIT_START, sync=False)

# Initalize and get arguments, timers, and Tensorboard writer.
Expand Down Expand Up @@ -232,6 +235,9 @@ def pretrain(train_valid_test_dataset_provider,
mllogger.event(key="trained_samples",
value=(args.consumed_train_samples - args.ext_lr_steps) * args.seq_length,
sync=False)
mllogger.event(key="train_samples",
value=(args.consumed_train_samples - args.ext_lr_steps) * args.seq_length,
sync=False)
mllogger.end(key=mllogger.constants.BLOCK_STOP,
metadata={'first_epoch_num': 0},
sync=False)
Expand Down Expand Up @@ -813,6 +819,9 @@ def train(forward_step_func, model, optimizer, opt_param_scheduler,
mllogger.event(key="trained_samples",
value=(args.consumed_train_samples - args.ext_lr_steps) * args.seq_length,
sync=False)
mllogger.event(key="train_samples",
value=(args.consumed_train_samples - args.ext_lr_steps) * args.seq_length,
sync=False)
if not saved_checkpoint:
save_checkpoint_and_time(iteration, model, optimizer,
opt_param_scheduler)
Expand Down

0 comments on commit 2ea9ba1

Please sign in to comment.