Skip to content

Commit

Permalink
add all params to cli
Browse files Browse the repository at this point in the history
  • Loading branch information
abhishekkrthakur committed Aug 12, 2024
1 parent f7de8cd commit 7587437
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 832 deletions.
103 changes: 11 additions & 92 deletions src/autotrain/cli/run_image_classification.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from argparse import ArgumentParser

from autotrain import logger
from autotrain.cli.utils import common_args, img_clf_munge_data
from autotrain.cli.utils import get_field_info, img_clf_munge_data
from autotrain.project import AutoTrainProject
from autotrain.trainers.image_classification.params import ImageClassificationParams

Expand All @@ -15,108 +15,27 @@ def run_image_classification_command_factory(args):
class RunAutoTrainImageClassificationCommand(BaseAutoTrainCommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
arg_list = get_field_info(ImageClassificationParams)
arg_list = [
{
"arg": "--image-column",
"help": "Image column to use",
"required": False,
"type": str,
"default": "image",
},
{
"arg": "--target-column",
"help": "Target column to use",
"required": False,
"type": str,
"default": "target",
},
{
"arg": "--warmup-ratio",
"help": "Define the proportion of training to be dedicated to a linear warmup where learning rate gradually increases. This can help in stabilizing the training process early on. Default ratio is 0.1.",
"required": False,
"type": float,
"default": 0.1,
},
{
"arg": "--optimizer",
"help": "Choose the optimizer algorithm for training the model. Different optimizers can affect the training speed and model performance. 'adamw_torch' is used by default.",
"required": False,
"type": str,
"default": "adamw_torch",
},
{
"arg": "--scheduler",
"help": "Select the learning rate scheduler to adjust the learning rate based on the number of epochs. 'linear' decreases the learning rate linearly from the initial lr set. Default is 'linear'. Try 'cosine' for a cosine annealing schedule.",
"required": False,
"type": str,
"default": "linear",
},
{
"arg": "--weight-decay",
"help": "Set the weight decay rate to apply for regularization. Helps in preventing the model from overfitting by penalizing large weights. Default is 0.0, meaning no weight decay is applied.",
"required": False,
"type": float,
"default": 0.0,
},
{
"arg": "--max-grad-norm",
"help": "Specify the maximum norm of the gradients for gradient clipping. Gradient clipping is used to prevent the exploding gradient problem in deep neural networks. Default is 1.0.",
"required": False,
"type": float,
"default": 1.0,
},
{
"arg": "--logging-steps",
"help": "Determine how often to log training progress. Set this to the number of steps between each log output. -1 determines logging steps automatically. Default is -1.",
"required": False,
"type": int,
"default": -1,
},
{
"arg": "--eval-strategy",
"help": "Specify how often to evaluate the model performance. Options include 'no', 'steps', 'epoch'. 'epoch' evaluates at the end of each training epoch by default.",
"required": False,
"type": str,
"default": "epoch",
"choices": ["steps", "epoch", "no"],
},
{
"arg": "--save-total-limit",
"help": "Limit the total number of model checkpoints to save. Helps manage disk space by retaining only the most recent checkpoints. Default is to save only the latest one.",
"required": False,
"type": int,
"default": 1,
},
{
"arg": "--auto-find-batch-size",
"help": "Enable automatic batch size determination based on your hardware capabilities. When set, it tries to find the largest batch size that fits in memory.",
"arg": "--train",
"help": "Command to train the model",
"required": False,
"action": "store_true",
},
{
"arg": "--mixed-precision",
"help": "Choose the precision mode for training to optimize performance and memory usage. Options are 'fp16', 'bf16', or None for default precision. Default is None.",
"arg": "--deploy",
"help": "Command to deploy the model (limited availability)",
"required": False,
"type": str,
"default": None,
"choices": ["fp16", "bf16", None],
},
{
"arg": "--early-stopping-patience",
"help": "Specify the number of epochs with no improvement after which training will stop. Default is 5.",
"required": False,
"type": int,
"default": 5,
"action": "store_true",
},
{
"arg": "--early-stopping-threshold",
"help": "Define the minimum change in the monitored metric to qualify as an improvement. Default is 0.01.",
"arg": "--inference",
"help": "Command to run inference (limited availability)",
"required": False,
"type": float,
"default": 0.01,
"action": "store_true",
},
]
arg_list = common_args() + arg_list
] + arg_list
run_image_classification_parser = parser.add_parser(
"image-classification", description="✨ Run AutoTrain Image Classification"
)
Expand Down
244 changes: 9 additions & 235 deletions src/autotrain/cli/run_llm.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from argparse import ArgumentParser

from autotrain import logger
from autotrain.cli.utils import common_args, llm_munge_data
from autotrain.cli.utils import get_field_info, llm_munge_data
from autotrain.project import AutoTrainProject
from autotrain.trainers.clm.params import LLMTrainingParams

Expand All @@ -15,253 +15,27 @@ def run_llm_command_factory(args):
class RunAutoTrainLLMCommand(BaseAutoTrainCommand):
@staticmethod
def register_subcommand(parser: ArgumentParser):
arg_list = get_field_info(LLMTrainingParams)
arg_list = [
{
"arg": "--text_column",
"help": "Specify the dataset column to use for text data. This parameter is essential for models processing textual information. Default is 'text'.",
"required": False,
"type": str,
"default": "text",
"alias": ["--text-column"],
},
{
"arg": "--rejected_text_column",
"help": "Define the column to use for storing rejected text entries, which are typically entries that do not meet certain criteria for processing. Default is 'rejected'. Used only for orpo, dpo and reward trainerss",
"required": False,
"type": str,
"default": "rejected",
"alias": ["--rejected-text-column"],
},
{
"arg": "--prompt_text_column",
"help": "Identify the column that contains prompt text for tasks requiring contextual inputs, such as conversation or completion generation. Default is 'prompt'. Used only for dpo trainer",
"required": False,
"type": str,
"default": "prompt",
"alias": ["--prompt-text-column"],
},
{
"arg": "--model-ref",
"help": "Reference model to use for DPO when not using PEFT",
"required": False,
"type": str,
"alias": ["--model-ref"],
},
{
"arg": "--warmup_ratio",
"help": "Set the proportion of training allocated to warming up the learning rate, which can enhance model stability and performance at the start of training. Default is 0.1",
"required": False,
"type": float,
"default": 0.1,
"alias": ["--warmup-ratio"],
},
{
"arg": "--optimizer",
"help": "Choose the optimizer algorithm for training the model. Different optimizers can affect the training speed and model performance. 'adamw_torch' is used by default.",
"required": False,
"type": str,
"default": "adamw_torch",
},
{
"arg": "--scheduler",
"help": "Select the learning rate scheduler to adjust the learning rate based on the number of epochs. 'linear' decreases the learning rate linearly from the initial lr set. Default is 'linear'. Try 'cosine' for a cosine annealing schedule.",
"required": False,
"type": str,
"default": "linear",
},
{
"arg": "--weight_decay",
"help": "Define the weight decay rate for regularization, which helps prevent overfitting by penalizing larger weights. Default is 0.0",
"required": False,
"type": float,
"default": 0.0,
"alias": ["--weight-decay"],
},
{
"arg": "--max_grad_norm",
"help": "Set the maximum norm for gradient clipping, which is critical for preventing gradients from exploding during backpropagation. Default is 1.0.",
"required": False,
"type": float,
"default": 1.0,
"alias": ["--max-grad-norm"],
},
{
"arg": "--add_eos_token",
"help": "Toggle whether to automatically add an End Of Sentence (EOS) token at the end of texts, which can be critical for certain types of models like language models. Only used for `default` trainer",
"arg": "--train",
"help": "Command to train the model",
"required": False,
"action": "store_true",
"alias": ["--add-eos-token"],
},
{
"arg": "--block_size",
"help": "Specify the block size for processing sequences. This is maximum sequence length or length of one block of text. Setting to -1 determines block size automatically. Default is -1.",
"required": False,
"type": str,
"default": "-1",
"alias": ["--block-size"],
},
{
"arg": "--peft",
"help": "Enable LoRA-PEFT",
"arg": "--deploy",
"help": "Command to deploy the model (limited availability)",
"required": False,
"action": "store_true",
"alias": ["--use-peft"],
},
{
"arg": "--lora_r",
"help": "Set the 'r' parameter for Low-Rank Adaptation (LoRA). Default is 16.",
"required": False,
"type": int,
"default": 16,
"alias": ["--lora-r"],
},
{
"arg": "--lora_alpha",
"help": "Specify the 'alpha' parameter for LoRA. Default is 32.",
"required": False,
"type": int,
"default": 32,
"alias": ["--lora-alpha"],
},
{
"arg": "--lora_dropout",
"help": "Set the dropout rate within the LoRA layers to help prevent overfitting during adaptation. Default is 0.05.",
"required": False,
"type": float,
"default": 0.05,
"alias": ["--lora-dropout"],
},
{
"arg": "--logging_steps",
"help": "Determine how often to log training progress in terms of steps. Setting it to '-1' determines logging steps automatically.",
"required": False,
"type": int,
"default": -1,
"alias": ["--logging-steps"],
},
{
"arg": "--eval_strategy",
"help": "Choose how frequently to evaluate the model's performance, with 'epoch' as the default, meaning at the end of each training epoch",
"required": False,
"type": str,
"default": "epoch",
"alias": ["--eval-strategy"],
"choices": ["epoch", "steps", "no"],
},
{
"arg": "--save_total_limit",
"help": "Limit the total number of saved model checkpoints to manage disk usage effectively. Default is to save only the latest checkpoint",
"required": False,
"type": int,
"default": 1,
"alias": ["--save-total-limit"],
},
{
"arg": "--auto_find_batch_size",
"help": "Automatically determine the optimal batch size based on system capabilities to maximize efficiency.",
"arg": "--inference",
"help": "Command to run inference (limited availability)",
"required": False,
"action": "store_true",
"alias": ["--auto-find-batch-size"],
},
{
"arg": "--mixed_precision",
"help": "Choose the precision mode for training to optimize performance and memory usage. Options are 'fp16', 'bf16', or None for default precision. Default is None.",
"required": False,
"type": str,
"default": None,
"choices": ["fp16", "bf16", None],
"alias": ["--mixed-precision"],
},
{
"arg": "--quantization",
"help": "Choose the quantization level to reduce model size and potentially increase inference speed. Options include 'int4', 'int8', or None. Enabling requires --peft",
"required": False,
"type": str,
"default": None,
"alias": ["--quantization"],
"choices": ["int4", "int8", None],
},
{
"arg": "--model_max_length",
"help": "Set the maximum length for the model to process in a single batch, which can affect both performance and memory usage. Default is 1024",
"required": False,
"type": int,
"default": 1024,
"alias": ["--model-max-length"],
},
{
"arg": "--max_prompt_length",
"help": "Specify the maximum length for prompts used in training, particularly relevant for tasks requiring initial contextual input. Used only for `orpo` trainer.",
"required": False,
"type": int,
"default": 128,
"alias": ["--max-prompt-length"],
},
{
"arg": "--max_completion_length",
"help": "Completion length to use, for orpo: encoder-decoder models only",
"required": False,
"type": int,
"default": None,
"alias": ["--max-completion-length"],
},
{
"arg": "--trainer",
"help": "Trainer type to use",
"required": False,
"type": str,
"default": "default",
"choices": ["default", "dpo", "sft", "orpo", "reward"],
},
{
"arg": "--target_modules",
"help": "Identify specific modules within the model architecture to target with adaptations or optimizations, such as LoRA. Comma separated list of module names. Default is 'all-linear'.",
"required": False,
"type": str,
"default": "all-linear",
"alias": ["--target-modules"],
},
{
"arg": "--merge_adapter",
"help": "Use this flag to merge PEFT adapter with the model",
"required": False,
"action": "store_true",
"alias": ["--merge-adapter"],
},
{
"arg": "--use_flash_attention_2",
"help": "Use flash attention 2",
"required": False,
"action": "store_true",
"alias": ["--use-flash-attention-2", "--use-fa2"],
},
{
"arg": "--dpo-beta",
"help": "Beta for DPO trainer",
"required": False,
"type": float,
"default": 0.1,
"alias": ["--dpo-beta"],
},
{
"arg": "--chat_template",
"help": "Apply a specific template for chat-based interactions, with options including 'tokenizer', 'chatml', 'zephyr', or None. This setting can shape the model's conversational behavior. ",
"required": False,
"default": None,
"alias": ["--chat-template"],
"choices": ["tokenizer", "chatml", "zephyr", None],
},
{
"arg": "--padding",
"help": "Specify the padding direction for sequences, critical for models sensitive to input alignment. Options include 'left', 'right', or None",
"required": False,
"type": str,
"default": None,
"alias": ["--padding"],
"choices": ["left", "right", None],
},
]
arg_list = common_args() + arg_list
] + arg_list
run_llm_parser = parser.add_parser("llm", description="✨ Run AutoTrain LLM")
for arg in arg_list:
names = [arg["arg"]] + arg.get("alias", [])
Expand Down
Loading

0 comments on commit 7587437

Please sign in to comment.