Skip to content

Commit

Permalink
Remove finetuning recipes for Long Context since there is no use case (
Browse files Browse the repository at this point in the history
…#10703)

Signed-off-by: Boxiang Wang <[email protected]>
Co-authored-by: Eric Harper <[email protected]>
  • Loading branch information
BoxiangW and ericharper authored Oct 3, 2024
1 parent 810aaea commit 3d1b2c7
Show file tree
Hide file tree
Showing 8 changed files with 0 additions and 356 deletions.
45 changes: 0 additions & 45 deletions nemo/collections/llm/recipes/llama3_70b_16k.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,48 +129,3 @@ def pretrain_recipe(
recipe.data = run.Config(MockDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)

return recipe


@run.cli.factory(target=finetune, name=NAME)
def finetune_recipe(
dir: Optional[str] = None,
name: str = "default",
num_nodes: int = 2,
num_gpus_per_node: int = 8,
) -> run.Partial:
"""
Create a fine-tuning recipe for Llama3 70B model with 16k sequence length.
This function sets up a complete configuration for fine-tuning, including
model, trainer, and data settings optimized for 16k sequence length.
Args:
dir (Optional[str]): Directory for saving logs and checkpoints.
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
Returns:
run.Partial: Partial configuration for fine-tuning.
Examples:
CLI usage:
$ nemo llm finetune --factory llama3_70b_16k
$ nemo llm finetune --factory "llama3_70b_16k(num_nodes=4, name='my_70b_16k_finetune')"
Python API usage:
>>> recipe = finetune_recipe(name="llama3_70b_16k_finetune", num_nodes=4)
>>> print(recipe)
Note:
This recipe is optimized for fine-tuning the large 70B model with longer sequences (16k).
It uses the SQuAD dataset adapted for 16k sequence length. Be aware that this configuration
requires substantial computational resources.
"""
recipe = llama3_70b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)

recipe.model = model()
recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
recipe.data = run.Config(SquadDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)

return recipe
45 changes: 0 additions & 45 deletions nemo/collections/llm/recipes/llama3_70b_64k.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,48 +132,3 @@ def pretrain_recipe(
recipe.data = run.Config(MockDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)

return recipe


@run.cli.factory(target=finetune, name=NAME)
def finetune_recipe(
dir: Optional[str] = None,
name: str = "default",
num_nodes: int = 32,
num_gpus_per_node: int = 8,
) -> run.Partial:
"""
Create a fine-tuning recipe for Llama3 70B model with 64k sequence length.
This function sets up a complete configuration for fine-tuning, including
model, trainer, and data settings optimized for 64k sequence length.
Args:
dir (Optional[str]): Directory for saving logs and checkpoints.
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
Returns:
run.Partial: Partial configuration for fine-tuning.
Examples:
CLI usage:
$ nemo llm finetune --factory llama3_70b_64k
$ nemo llm finetune --factory "llama3_70b_64k(num_nodes=32, name='my_70b_64k_finetune')"
Python API usage:
>>> recipe = finetune_recipe(name="llama3_70b_64k_finetune", num_nodes=32)
>>> print(recipe)
Note:
This recipe is optimized for fine-tuning the large 70B model with long sequences (64k).
It uses the SQuAD dataset adapted for 64k sequence length. Be aware that this configuration
requires extensive computational resources due to the model size and extended sequence length.
"""
recipe = llama3_70b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)

recipe.model = model()
recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
recipe.data = run.Config(SquadDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)

return recipe
44 changes: 0 additions & 44 deletions nemo/collections/llm/recipes/llama3_8b_16k.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,47 +128,3 @@ def pretrain_recipe(
recipe.data = run.Config(MockDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)

return recipe


@run.cli.factory(target=finetune, name=NAME)
def finetune_recipe(
dir: Optional[str] = None,
name: str = "default",
num_nodes: int = 1,
num_gpus_per_node: int = 8,
) -> run.Partial:
"""
Create a fine-tuning recipe for Llama3 8B model with 16k sequence length.
This function sets up a complete configuration for fine-tuning, including
model, trainer, and data settings optimized for 16k sequence length.
Args:
dir (Optional[str]): Directory for saving logs and checkpoints.
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
Returns:
run.Partial: Partial configuration for fine-tuning.
Examples:
CLI usage:
$ nemo llm finetune --factory llama3_8b_16k
$ nemo llm finetune --factory "llama3_8b_16k(num_nodes=2, name='my_16k_finetune')"
Python API usage:
>>> recipe = finetune_recipe(name="llama3_8b_16k_finetune", num_nodes=2)
>>> print(recipe)
Note:
This recipe is optimized for fine-tuning with longer sequences (16k) compared to the standard 8k version.
It uses the SQuAD dataset adapted for 16k sequence length.
"""
recipe = llama3_8b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)

recipe.model = model()
recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
recipe.data = run.Config(SquadDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)

return recipe
45 changes: 0 additions & 45 deletions nemo/collections/llm/recipes/llama3_8b_64k.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,48 +129,3 @@ def pretrain_recipe(
recipe.data = run.Config(MockDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)

return recipe


@run.cli.factory(target=finetune, name=NAME)
def finetune_recipe(
dir: Optional[str] = None,
name: str = "default",
num_nodes: int = 1,
num_gpus_per_node: int = 8,
) -> run.Partial:
"""
Create a fine-tuning recipe for Llama3 8B model with 64k sequence length.
This function sets up a complete configuration for fine-tuning, including
model, trainer, and data settings optimized for 64k sequence length.
Args:
dir (Optional[str]): Directory for saving logs and checkpoints.
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
Returns:
run.Partial: Partial configuration for fine-tuning.
Examples:
CLI usage:
$ nemo llm finetune --factory llama3_8b_64k
$ nemo llm finetune --factory "llama3_8b_64k(num_nodes=2, name='my_64k_finetune')"
Python API usage:
>>> recipe = finetune_recipe(name="llama3_8b_64k_finetune", num_nodes=2)
>>> print(recipe)
Note:
This recipe is optimized for fine-tuning with long sequences (64k) compared to the standard 8k version.
It uses the SQuAD dataset adapted for 64k sequence length. Be aware that this configuration requires
substantial computational resources due to the extended sequence length.
"""
recipe = llama3_8b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)

recipe.model = model()
recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
recipe.data = run.Config(SquadDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)

return recipe
44 changes: 0 additions & 44 deletions nemo/collections/llm/recipes/mixtral_8x3b_16k.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,47 +130,3 @@ def pretrain_recipe(
recipe.data = run.Config(MockDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)

return recipe


@run.cli.factory(target=finetune, name=NAME)
def finetune_recipe(
dir: Optional[str] = None,
name: str = "default",
num_nodes: int = 1,
num_gpus_per_node: int = 8,
) -> run.Partial:
"""
Create a fine-tuning recipe for Mixtral 8x3B model with 16k sequence length.
This function sets up a complete configuration for fine-tuning, including
model, trainer, and data settings optimized for 16k sequence length.
Args:
dir (Optional[str]): Directory for saving logs and checkpoints.
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
Returns:
run.Partial: Partial configuration for fine-tuning.
Examples:
CLI usage:
$ nemo llm finetune --factory mixtral_8x3b_16k
$ nemo llm finetune --factory "mixtral_8x3b_16k(num_nodes=2, name='my_16k_finetune')"
Python API usage:
>>> recipe = finetune_recipe(name="mixtral_8x3b_16k_finetune", num_nodes=2)
>>> print(recipe)
Note:
This recipe is optimized for fine-tuning with longer sequences (16k) compared to the standard version.
It uses the SQuAD dataset adapted for 16k sequence length.
"""
recipe = mixtral_8x3b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)

recipe.model = model()
recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
recipe.data = run.Config(SquadDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)

return recipe
45 changes: 0 additions & 45 deletions nemo/collections/llm/recipes/mixtral_8x3b_64k.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,48 +131,3 @@ def pretrain_recipe(
recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
recipe.data = run.Config(MockDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)
return recipe


@run.cli.factory(target=finetune, name=NAME)
def finetune_recipe(
dir: Optional[str] = None,
name: str = "default",
num_nodes: int = 8,
num_gpus_per_node: int = 8,
) -> run.Partial:
"""
Create a fine-tuning recipe for Mixtral 8x3B model with 64k sequence length.
This function sets up a complete configuration for fine-tuning, including
model, trainer, and data settings optimized for 64k sequence length.
Args:
dir (Optional[str]): Directory for saving logs and checkpoints.
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
Returns:
run.Partial: Partial configuration for fine-tuning.
Examples:
CLI usage:
$ nemo llm finetune --factory mixtral_8x3b_64k
$ nemo llm finetune --factory "mixtral_8x3b_64k(num_nodes=8, name='my_64k_finetune')"
Python API usage:
>>> recipe = finetune_recipe(name="mixtral_8x3b_64k_finetune", num_nodes=8)
>>> print(recipe)
Note:
This recipe is optimized for fine-tuning with long sequences (64k) compared to the standard version.
It uses the SQuAD dataset adapted for 64k sequence length. Be aware that this configuration requires
substantial computational resources due to the extended sequence length.
"""
recipe = mixtral_8x3b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)

recipe.model = model()
recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
recipe.data = run.Config(SquadDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)

return recipe
43 changes: 0 additions & 43 deletions nemo/collections/llm/recipes/mixtral_8x7b_16k.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,46 +129,3 @@ def pretrain_recipe(
recipe.data = run.Config(MockDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)

return recipe


@run.cli.factory(target=finetune, name=NAME)
def finetune_recipe(
dir: Optional[str] = None,
name: str = "default",
num_nodes: int = 2,
num_gpus_per_node: int = 8,
) -> run.Partial:
"""
Create a fine-tuning recipe for Mixtral 8x7B model with 16k sequence length.
This function sets up a complete configuration for fine-tuning, including
model, trainer, and data settings optimized for 16k sequence length.
Args:
dir (Optional[str]): Directory for saving logs and checkpoints.
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
Returns:
run.Partial: Partial configuration for fine-tuning.
Examples:
CLI usage:
$ nemo llm finetune --factory mixtral_8x7b_16k
$ nemo llm finetune --factory "mixtral_8x7b_16k(num_nodes=2, name='my_16k_finetune')"
Python API usage:
>>> recipe = finetune_recipe(name="mixtral_8x7b_16k_finetune", num_nodes=2)
>>> print(recipe)
Note:
This recipe uses the SQuAD dataset for fine-tuning.
"""
recipe = mixtral_8x7b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)

recipe.model = model()
recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
recipe.data = run.Config(SquadDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)

return recipe
45 changes: 0 additions & 45 deletions nemo/collections/llm/recipes/mixtral_8x7b_64k.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,48 +133,3 @@ def pretrain_recipe(
recipe.data = run.Config(MockDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)

return recipe


@run.cli.factory(target=finetune, name=NAME)
def finetune_recipe(
dir: Optional[str] = None,
name: str = "default",
num_nodes: int = 16,
num_gpus_per_node: int = 8,
) -> run.Partial:
"""
Create a fine-tuning recipe for Mixtral 8x7B model with 64k sequence length.
This function sets up a complete configuration for fine-tuning, including
model, trainer, and data settings optimized for 64k sequence length.
Args:
dir (Optional[str]): Directory for saving logs and checkpoints.
name (str): Name of the fine-tuning run.
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
Returns:
run.Partial: Partial configuration for fine-tuning.
Examples:
CLI usage:
$ nemo llm finetune --factory mixtral_8x7b_64k
$ nemo llm finetune --factory "mixtral_8x7b_64k(num_nodes=16, name='my_64k_finetune')"
Python API usage:
>>> recipe = finetune_recipe(name="mixtral_8x7b_64k_finetune", num_nodes=16)
>>> print(recipe)
Note:
This recipe is optimized for fine-tuning with long sequences (64k) compared to the standard version.
It uses the SQuAD dataset adapted for 64k sequence length. Be aware that this configuration requires
substantial computational resources due to the model size and extended sequence length.
"""
recipe = mixtral_8x7b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)

recipe.model = model()
recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
recipe.data = run.Config(SquadDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)

return recipe

0 comments on commit 3d1b2c7

Please sign in to comment.