Remove finetuning recipes for Long Context since there is no use case (…

…#10703) Signed-off-by: Boxiang Wang <[email protected]> Co-authored-by: Eric Harper <[email protected]>
NVIDIA · Oct 3, 2024 · 3d1b2c7 · 3d1b2c7
1 parent 810aaea
commit 3d1b2c7
Show file tree

Hide file tree

Showing 8 changed files with 0 additions and 356 deletions.
diff --git a/nemo/collections/llm/recipes/llama3_70b_16k.py b/nemo/collections/llm/recipes/llama3_70b_16k.py
@@ -129,48 +129,3 @@ def pretrain_recipe(
     recipe.data = run.Config(MockDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)
 
     return recipe
-
-
-@run.cli.factory(target=finetune, name=NAME)
-def finetune_recipe(
-    dir: Optional[str] = None,
-    name: str = "default",
-    num_nodes: int = 2,
-    num_gpus_per_node: int = 8,
-) -> run.Partial:
-    """
-    Create a fine-tuning recipe for Llama3 70B model with 16k sequence length.
-
-    This function sets up a complete configuration for fine-tuning, including
-    model, trainer, and data settings optimized for 16k sequence length.
-
-    Args:
-        dir (Optional[str]): Directory for saving logs and checkpoints.
-        name (str): Name of the fine-tuning run.
-        num_nodes (int): Number of compute nodes to use.
-        num_gpus_per_node (int): Number of GPUs per node.
-
-    Returns:
-        run.Partial: Partial configuration for fine-tuning.
-
-    Examples:
-        CLI usage:
-            $ nemo llm finetune --factory llama3_70b_16k
-            $ nemo llm finetune --factory "llama3_70b_16k(num_nodes=4, name='my_70b_16k_finetune')"
-
-        Python API usage:
-            >>> recipe = finetune_recipe(name="llama3_70b_16k_finetune", num_nodes=4)
-            >>> print(recipe)
-
-    Note:
-        This recipe is optimized for fine-tuning the large 70B model with longer sequences (16k).
-        It uses the SQuAD dataset adapted for 16k sequence length. Be aware that this configuration
-        requires substantial computational resources.
-    """
-    recipe = llama3_70b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-
-    recipe.model = model()
-    recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-    recipe.data = run.Config(SquadDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)
-
-    return recipe
diff --git a/nemo/collections/llm/recipes/llama3_70b_64k.py b/nemo/collections/llm/recipes/llama3_70b_64k.py
@@ -132,48 +132,3 @@ def pretrain_recipe(
     recipe.data = run.Config(MockDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)
 
     return recipe
-
-
-@run.cli.factory(target=finetune, name=NAME)
-def finetune_recipe(
-    dir: Optional[str] = None,
-    name: str = "default",
-    num_nodes: int = 32,
-    num_gpus_per_node: int = 8,
-) -> run.Partial:
-    """
-    Create a fine-tuning recipe for Llama3 70B model with 64k sequence length.
-
-    This function sets up a complete configuration for fine-tuning, including
-    model, trainer, and data settings optimized for 64k sequence length.
-
-    Args:
-        dir (Optional[str]): Directory for saving logs and checkpoints.
-        name (str): Name of the fine-tuning run.
-        num_nodes (int): Number of compute nodes to use.
-        num_gpus_per_node (int): Number of GPUs per node.
-
-    Returns:
-        run.Partial: Partial configuration for fine-tuning.
-
-    Examples:
-        CLI usage:
-            $ nemo llm finetune --factory llama3_70b_64k
-            $ nemo llm finetune --factory "llama3_70b_64k(num_nodes=32, name='my_70b_64k_finetune')"
-
-        Python API usage:
-            >>> recipe = finetune_recipe(name="llama3_70b_64k_finetune", num_nodes=32)
-            >>> print(recipe)
-
-    Note:
-        This recipe is optimized for fine-tuning the large 70B model with long sequences (64k).
-        It uses the SQuAD dataset adapted for 64k sequence length. Be aware that this configuration
-        requires extensive computational resources due to the model size and extended sequence length.
-    """
-    recipe = llama3_70b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-
-    recipe.model = model()
-    recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-    recipe.data = run.Config(SquadDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)
-
-    return recipe
diff --git a/nemo/collections/llm/recipes/llama3_8b_16k.py b/nemo/collections/llm/recipes/llama3_8b_16k.py
@@ -128,47 +128,3 @@ def pretrain_recipe(
     recipe.data = run.Config(MockDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)
 
     return recipe
-
-
-@run.cli.factory(target=finetune, name=NAME)
-def finetune_recipe(
-    dir: Optional[str] = None,
-    name: str = "default",
-    num_nodes: int = 1,
-    num_gpus_per_node: int = 8,
-) -> run.Partial:
-    """
-    Create a fine-tuning recipe for Llama3 8B model with 16k sequence length.
-
-    This function sets up a complete configuration for fine-tuning, including
-    model, trainer, and data settings optimized for 16k sequence length.
-
-    Args:
-        dir (Optional[str]): Directory for saving logs and checkpoints.
-        name (str): Name of the fine-tuning run.
-        num_nodes (int): Number of compute nodes to use.
-        num_gpus_per_node (int): Number of GPUs per node.
-
-    Returns:
-        run.Partial: Partial configuration for fine-tuning.
-
-    Examples:
-        CLI usage:
-            $ nemo llm finetune --factory llama3_8b_16k
-            $ nemo llm finetune --factory "llama3_8b_16k(num_nodes=2, name='my_16k_finetune')"
-
-        Python API usage:
-            >>> recipe = finetune_recipe(name="llama3_8b_16k_finetune", num_nodes=2)
-            >>> print(recipe)
-
-    Note:
-        This recipe is optimized for fine-tuning with longer sequences (16k) compared to the standard 8k version.
-        It uses the SQuAD dataset adapted for 16k sequence length.
-    """
-    recipe = llama3_8b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-
-    recipe.model = model()
-    recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-    recipe.data = run.Config(SquadDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)
-
-    return recipe
diff --git a/nemo/collections/llm/recipes/llama3_8b_64k.py b/nemo/collections/llm/recipes/llama3_8b_64k.py
@@ -129,48 +129,3 @@ def pretrain_recipe(
     recipe.data = run.Config(MockDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)
 
     return recipe
-
-
-@run.cli.factory(target=finetune, name=NAME)
-def finetune_recipe(
-    dir: Optional[str] = None,
-    name: str = "default",
-    num_nodes: int = 1,
-    num_gpus_per_node: int = 8,
-) -> run.Partial:
-    """
-    Create a fine-tuning recipe for Llama3 8B model with 64k sequence length.
-
-    This function sets up a complete configuration for fine-tuning, including
-    model, trainer, and data settings optimized for 64k sequence length.
-
-    Args:
-        dir (Optional[str]): Directory for saving logs and checkpoints.
-        name (str): Name of the fine-tuning run.
-        num_nodes (int): Number of compute nodes to use.
-        num_gpus_per_node (int): Number of GPUs per node.
-
-    Returns:
-        run.Partial: Partial configuration for fine-tuning.
-
-    Examples:
-        CLI usage:
-            $ nemo llm finetune --factory llama3_8b_64k
-            $ nemo llm finetune --factory "llama3_8b_64k(num_nodes=2, name='my_64k_finetune')"
-
-        Python API usage:
-            >>> recipe = finetune_recipe(name="llama3_8b_64k_finetune", num_nodes=2)
-            >>> print(recipe)
-
-    Note:
-        This recipe is optimized for fine-tuning with long sequences (64k) compared to the standard 8k version.
-        It uses the SQuAD dataset adapted for 64k sequence length. Be aware that this configuration requires
-        substantial computational resources due to the extended sequence length.
-    """
-    recipe = llama3_8b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-
-    recipe.model = model()
-    recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-    recipe.data = run.Config(SquadDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)
-
-    return recipe
diff --git a/nemo/collections/llm/recipes/mixtral_8x3b_16k.py b/nemo/collections/llm/recipes/mixtral_8x3b_16k.py
@@ -130,47 +130,3 @@ def pretrain_recipe(
     recipe.data = run.Config(MockDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)
 
     return recipe
-
-
-@run.cli.factory(target=finetune, name=NAME)
-def finetune_recipe(
-    dir: Optional[str] = None,
-    name: str = "default",
-    num_nodes: int = 1,
-    num_gpus_per_node: int = 8,
-) -> run.Partial:
-    """
-    Create a fine-tuning recipe for Mixtral 8x3B model with 16k sequence length.
-
-    This function sets up a complete configuration for fine-tuning, including
-    model, trainer, and data settings optimized for 16k sequence length.
-
-    Args:
-        dir (Optional[str]): Directory for saving logs and checkpoints.
-        name (str): Name of the fine-tuning run.
-        num_nodes (int): Number of compute nodes to use.
-        num_gpus_per_node (int): Number of GPUs per node.
-
-    Returns:
-        run.Partial: Partial configuration for fine-tuning.
-
-    Examples:
-        CLI usage:
-            $ nemo llm finetune --factory mixtral_8x3b_16k
-            $ nemo llm finetune --factory "mixtral_8x3b_16k(num_nodes=2, name='my_16k_finetune')"
-
-        Python API usage:
-            >>> recipe = finetune_recipe(name="mixtral_8x3b_16k_finetune", num_nodes=2)
-            >>> print(recipe)
-
-    Note:
-        This recipe is optimized for fine-tuning with longer sequences (16k) compared to the standard version.
-        It uses the SQuAD dataset adapted for 16k sequence length.
-    """
-    recipe = mixtral_8x3b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-
-    recipe.model = model()
-    recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-    recipe.data = run.Config(SquadDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)
-
-    return recipe
diff --git a/nemo/collections/llm/recipes/mixtral_8x3b_64k.py b/nemo/collections/llm/recipes/mixtral_8x3b_64k.py
@@ -131,48 +131,3 @@ def pretrain_recipe(
     recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
     recipe.data = run.Config(MockDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)
     return recipe
-
-
-@run.cli.factory(target=finetune, name=NAME)
-def finetune_recipe(
-    dir: Optional[str] = None,
-    name: str = "default",
-    num_nodes: int = 8,
-    num_gpus_per_node: int = 8,
-) -> run.Partial:
-    """
-    Create a fine-tuning recipe for Mixtral 8x3B model with 64k sequence length.
-
-    This function sets up a complete configuration for fine-tuning, including
-    model, trainer, and data settings optimized for 64k sequence length.
-
-    Args:
-        dir (Optional[str]): Directory for saving logs and checkpoints.
-        name (str): Name of the fine-tuning run.
-        num_nodes (int): Number of compute nodes to use.
-        num_gpus_per_node (int): Number of GPUs per node.
-
-    Returns:
-        run.Partial: Partial configuration for fine-tuning.
-
-    Examples:
-        CLI usage:
-            $ nemo llm finetune --factory mixtral_8x3b_64k
-            $ nemo llm finetune --factory "mixtral_8x3b_64k(num_nodes=8, name='my_64k_finetune')"
-
-        Python API usage:
-            >>> recipe = finetune_recipe(name="mixtral_8x3b_64k_finetune", num_nodes=8)
-            >>> print(recipe)
-
-    Note:
-        This recipe is optimized for fine-tuning with long sequences (64k) compared to the standard version.
-        It uses the SQuAD dataset adapted for 64k sequence length. Be aware that this configuration requires
-        substantial computational resources due to the extended sequence length.
-    """
-    recipe = mixtral_8x3b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-
-    recipe.model = model()
-    recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-    recipe.data = run.Config(SquadDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)
-
-    return recipe
diff --git a/nemo/collections/llm/recipes/mixtral_8x7b_16k.py b/nemo/collections/llm/recipes/mixtral_8x7b_16k.py
@@ -129,46 +129,3 @@ def pretrain_recipe(
     recipe.data = run.Config(MockDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)
 
     return recipe
-
-
-@run.cli.factory(target=finetune, name=NAME)
-def finetune_recipe(
-    dir: Optional[str] = None,
-    name: str = "default",
-    num_nodes: int = 2,
-    num_gpus_per_node: int = 8,
-) -> run.Partial:
-    """
-    Create a fine-tuning recipe for Mixtral 8x7B model with 16k sequence length.
-
-    This function sets up a complete configuration for fine-tuning, including
-    model, trainer, and data settings optimized for 16k sequence length.
-
-    Args:
-        dir (Optional[str]): Directory for saving logs and checkpoints.
-        name (str): Name of the fine-tuning run.
-        num_nodes (int): Number of compute nodes to use.
-        num_gpus_per_node (int): Number of GPUs per node.
-
-    Returns:
-        run.Partial: Partial configuration for fine-tuning.
-
-    Examples:
-        CLI usage:
-            $ nemo llm finetune --factory mixtral_8x7b_16k
-            $ nemo llm finetune --factory "mixtral_8x7b_16k(num_nodes=2, name='my_16k_finetune')"
-
-        Python API usage:
-            >>> recipe = finetune_recipe(name="mixtral_8x7b_16k_finetune", num_nodes=2)
-            >>> print(recipe)
-
-    Note:
-        This recipe uses the SQuAD dataset for fine-tuning.
-    """
-    recipe = mixtral_8x7b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-
-    recipe.model = model()
-    recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-    recipe.data = run.Config(SquadDataModule, seq_length=16384, global_batch_size=512, micro_batch_size=1)
-
-    return recipe
diff --git a/nemo/collections/llm/recipes/mixtral_8x7b_64k.py b/nemo/collections/llm/recipes/mixtral_8x7b_64k.py
@@ -133,48 +133,3 @@ def pretrain_recipe(
     recipe.data = run.Config(MockDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)
 
     return recipe
-
-
-@run.cli.factory(target=finetune, name=NAME)
-def finetune_recipe(
-    dir: Optional[str] = None,
-    name: str = "default",
-    num_nodes: int = 16,
-    num_gpus_per_node: int = 8,
-) -> run.Partial:
-    """
-    Create a fine-tuning recipe for Mixtral 8x7B model with 64k sequence length.
-
-    This function sets up a complete configuration for fine-tuning, including
-    model, trainer, and data settings optimized for 64k sequence length.
-
-    Args:
-        dir (Optional[str]): Directory for saving logs and checkpoints.
-        name (str): Name of the fine-tuning run.
-        num_nodes (int): Number of compute nodes to use.
-        num_gpus_per_node (int): Number of GPUs per node.
-
-    Returns:
-        run.Partial: Partial configuration for fine-tuning.
-
-    Examples:
-        CLI usage:
-            $ nemo llm finetune --factory mixtral_8x7b_64k
-            $ nemo llm finetune --factory "mixtral_8x7b_64k(num_nodes=16, name='my_64k_finetune')"
-
-        Python API usage:
-            >>> recipe = finetune_recipe(name="mixtral_8x7b_64k_finetune", num_nodes=16)
-            >>> print(recipe)
-
-    Note:
-        This recipe is optimized for fine-tuning with long sequences (64k) compared to the standard version.
-        It uses the SQuAD dataset adapted for 64k sequence length. Be aware that this configuration requires
-        substantial computational resources due to the model size and extended sequence length.
-    """
-    recipe = mixtral_8x7b.finetune_recipe(name=name, dir=dir, num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-
-    recipe.model = model()
-    recipe.trainer = trainer(num_nodes=num_nodes, num_gpus_per_node=num_gpus_per_node)
-    recipe.data = run.Config(SquadDataModule, seq_length=65536, global_batch_size=512, micro_batch_size=1)
-
-    return recipe