Skip to content

Commit

Permalink
tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
kylematoba committed Nov 29, 2024
1 parent 5329310 commit 3ead1c7
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 6 deletions.
3 changes: 2 additions & 1 deletion src/nanotron/models/llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ def __init__(
bias=False,
async_communication=tp_linear_async_communication and tp_mode is TensorParallelLinearMode.REDUCE_SCATTER,
)
do_compile = True
# do_compile = True
do_compile = False
# self.split_silu_mul = torch.compile(GLUActivation(config.hidden_act))
self.split_silu_mul = GLUActivation(config.hidden_act)
if do_compile:
Expand Down
2 changes: 1 addition & 1 deletion src/nanotron/serialize/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def save(
sanity_checks: bool = True,
) -> None:
assert isinstance(training_metadata, TrainingMetadata)
assert isinstance(valid_metadata, TrainingMetadata)
assert (valid_metadata is None) or isinstance(valid_metadata, TrainingMetadata)

try:
if should_save_config:
Expand Down
4 changes: 2 additions & 2 deletions src/nanotron/serialize/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class CheckpointMetadata:
tp: int
dp: int
train_meta: TrainingMetadata
valid_meta: TrainingMetadata
valid_meta: Optional[TrainingMetadata]


@dataclasses.dataclass
Expand Down Expand Up @@ -130,7 +130,7 @@ def save_meta(parallel_context: ParallelContext,
training_metadata: TrainingMetadata,
valid_metadata: TrainingMetadata):
assert isinstance(training_metadata, TrainingMetadata)
assert isinstance(valid_metadata, TrainingMetadata)
assert (valid_metadata is None) or isinstance(valid_metadata, TrainingMetadata)

if dist.get_rank(parallel_context.world_pg) != 0:
return
Expand Down
3 changes: 1 addition & 2 deletions src/nanotron/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def __init__(
parallel_context=self.parallel_context, root_folder=self.init_checkpoint_path
)
assert isinstance(checkpoint_metadata.train_meta, TrainingMetadata)
assert isinstance(checkpoint_metadata.valid_meta, TrainingMetadata)
assert (checkpoint_metadata.valid_meta is None) or isinstance(checkpoint_metadata.valid_meta, TrainingMetadata)

log_rank(str(checkpoint_metadata), logger=logger, level=logging.INFO, rank=0)
self.metadata: TrainingMetadata = checkpoint_metadata.train_meta
Expand Down Expand Up @@ -688,7 +688,6 @@ def train_step_logs(

if dist.get_rank(self.parallel_context.world_pg) in self.logger_ranks:
assert self.loggerwriter is not None, "loggerwriter should be defined on logger ranks"

lr = self.lr_scheduler.get_last_lr()[0]

log_entries = [
Expand Down

0 comments on commit 3ead1c7

Please sign in to comment.