diff --git a/docs/source/llm_finetuning.mdx b/docs/source/llm_finetuning.mdx index 7c1268298e..3597dba7dd 100644 --- a/docs/source/llm_finetuning.mdx +++ b/docs/source/llm_finetuning.mdx @@ -75,7 +75,7 @@ usage: autotrain [] llm [-h] [--train] [--deploy] [--inference] [--weight_decay WEIGHT_DECAY] [--max_grad_norm MAX_GRAD_NORM] [--add_eos_token] [--block_size BLOCK_SIZE] [--peft] [--lora_r LORA_R] [--lora_alpha LORA_ALPHA] [--lora_dropout LORA_DROPOUT] [--logging_steps LOGGING_STEPS] [--evaluation_strategy {epoch,steps,no}] - [--save_total_limit SAVE_TOTAL_LIMIT] [--save_strategy {epoch,steps}] [--auto_find_batch_size] + [--save_total_limit SAVE_TOTAL_LIMIT] [--auto_find_batch_size] [--mixed_precision {fp16,bf16,None}] [--quantization {int4,int8,None}] [--model_max_length MODEL_MAX_LENGTH] [--max_prompt_length MAX_PROMPT_LENGTH] [--max_completion_length MAX_COMPLETION_LENGTH] [--trainer {default,dpo,sft,orpo,reward}] [--target_modules TARGET_MODULES] [--merge_adapter] @@ -162,8 +162,6 @@ options: --save_total_limit SAVE_TOTAL_LIMIT, --save-total-limit SAVE_TOTAL_LIMIT Limit the total number of saved model checkpoints to manage disk usage effectively. Default is to save only the latest checkpoint - --save_strategy {epoch,steps}, --save-strategy {epoch,steps} - Define the checkpoint saving strategy, with 'epoch' as the default, saving checkpoints at the end of each training epoch. --auto_find_batch_size, --auto-find-batch-size Automatically determine the optimal batch size based on system capabilities to maximize efficiency. --mixed_precision {fp16,bf16,None}, --mixed-precision {fp16,bf16,None} diff --git a/docs/source/seq2seq.mdx b/docs/source/seq2seq.mdx index 959b1d894f..6daf43999c 100644 --- a/docs/source/seq2seq.mdx +++ b/docs/source/seq2seq.mdx @@ -45,7 +45,7 @@ usage: autotrain [] seq2seq [-h] [--train] [--deploy] [--inferen [--optimizer OPTIMIZER] [--scheduler SCHEDULER] [--weight-decay WEIGHT_DECAY] [--max-grad-norm MAX_GRAD_NORM] [--logging-steps LOGGING_STEPS] [--evaluation-strategy EVALUATION_STRATEGY] [--save-total-limit SAVE_TOTAL_LIMIT] - [--save-strategy SAVE_STRATEGY] [--auto-find-batch-size] [--mixed-precision {fp16,bf16,None}] [--peft] + [--auto-find-batch-size] [--mixed-precision {fp16,bf16,None}] [--peft] [--quantization {int8,None}] [--lora-r LORA_R] [--lora-alpha LORA_ALPHA] [--lora-dropout LORA_DROPOUT] [--target-modules TARGET_MODULES] @@ -118,9 +118,6 @@ options: --save-total-limit SAVE_TOTAL_LIMIT Limit the total number of model checkpoints to save. Helps manage disk space by retaining only the most recent checkpoints. Default is to save only the latest one. - --save-strategy SAVE_STRATEGY - Determine the strategy for saving model checkpoints. Possible values are 'no', 'steps', 'epoch'. 'epoch' saves a checkpoint - at the end of each epoch by default. --auto-find-batch-size Enable automatic batch size determination based on your hardware capabilities. When set, it tries to find the largest batch size that fits in memory. diff --git a/src/autotrain/__init__.py b/src/autotrain/__init__.py index 8b145ec3f6..4da5a0bf93 100644 --- a/src/autotrain/__init__.py +++ b/src/autotrain/__init__.py @@ -41,4 +41,4 @@ logger = Logger().get_logger() -__version__ = "0.7.71.dev0" +__version__ = "0.7.72.dev0" diff --git a/src/autotrain/app.py b/src/autotrain/app.py index 69ad98a789..168e53cfd5 100644 --- a/src/autotrain/app.py +++ b/src/autotrain/app.py @@ -96,7 +96,6 @@ epochs=3, padding="right", chat_template="none", - save_strategy="no", max_completion_length=128, ).model_dump() @@ -111,7 +110,6 @@ PARAMS["seq2seq"] = Seq2SeqParams( mixed_precision="fp16", target_modules="all-linear", - save_strategy="no", log="tensorboard", ).model_dump() PARAMS["tabular"] = TabularParams( @@ -257,7 +255,6 @@ async def fetch_params(task: str, param_type: str): "logging_steps", "evaluation_strategy", "save_total_limit", - "save_strategy", "auto_find_batch_size", "warmup_ratio", "weight_decay", @@ -281,7 +278,6 @@ async def fetch_params(task: str, param_type: str): "logging_steps", "auto_find_batch_size", "save_total_limit", - "save_strategy", "evaluation_strategy", ] task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params} @@ -294,7 +290,6 @@ async def fetch_params(task: str, param_type: str): "logging_steps", "auto_find_batch_size", "save_total_limit", - "save_strategy", "evaluation_strategy", ] task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params} @@ -307,7 +302,6 @@ async def fetch_params(task: str, param_type: str): "logging_steps", "auto_find_batch_size", "save_total_limit", - "save_strategy", "evaluation_strategy", "quantization", "lora_r", @@ -325,7 +319,6 @@ async def fetch_params(task: str, param_type: str): "logging_steps", "auto_find_batch_size", "save_total_limit", - "save_strategy", "evaluation_strategy", ] task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params} diff --git a/src/autotrain/cli/run_llm.py b/src/autotrain/cli/run_llm.py index 14a2699b52..aebdc8f606 100644 --- a/src/autotrain/cli/run_llm.py +++ b/src/autotrain/cli/run_llm.py @@ -155,15 +155,6 @@ def register_subcommand(parser: ArgumentParser): "default": 1, "alias": ["--save-total-limit"], }, - { - "arg": "--save_strategy", - "help": "Define the checkpoint saving strategy, with 'epoch' as the default, saving checkpoints at the end of each training epoch. ", - "required": False, - "type": str, - "default": "epoch", - "alias": ["--save-strategy"], - "choices": ["epoch", "steps"], - }, { "arg": "--auto_find_batch_size", "help": "Automatically determine the optimal batch size based on system capabilities to maximize efficiency.", diff --git a/src/autotrain/cli/run_seq2seq.py b/src/autotrain/cli/run_seq2seq.py index f15e60ea14..7528b2d22a 100644 --- a/src/autotrain/cli/run_seq2seq.py +++ b/src/autotrain/cli/run_seq2seq.py @@ -207,11 +207,6 @@ def __init__(self, args): else: raise ValueError("Must specify --train, --deploy or --inference") - if len(self.args.target_modules.strip()) == 0: - self.args.target_modules = [] - else: - self.args.target_modules = self.args.target_modules.split(",") - def run(self): logger.info("Running Seq2Seq Classification") if self.args.train: diff --git a/src/autotrain/trainers/clm/__main__.py b/src/autotrain/trainers/clm/__main__.py index 5371ea9574..09ecd7b7a7 100644 --- a/src/autotrain/trainers/clm/__main__.py +++ b/src/autotrain/trainers/clm/__main__.py @@ -228,7 +228,7 @@ def train(config): evaluation_strategy=config.evaluation_strategy if config.valid_split is not None else "no", logging_steps=logging_steps, save_total_limit=config.save_total_limit, - save_strategy=config.save_strategy, + save_strategy=config.evaluation_strategy if config.valid_split is not None else "no", gradient_accumulation_steps=config.gradient_accumulation, report_to=config.log, auto_find_batch_size=config.auto_find_batch_size, diff --git a/src/autotrain/trainers/clm/params.py b/src/autotrain/trainers/clm/params.py index 5f0a16d462..b34ac591d7 100644 --- a/src/autotrain/trainers/clm/params.py +++ b/src/autotrain/trainers/clm/params.py @@ -26,7 +26,6 @@ class LLMTrainingParams(AutoTrainParams): logging_steps: int = Field(-1, title="Logging steps") evaluation_strategy: str = Field("epoch", title="Evaluation strategy") save_total_limit: int = Field(1, title="Save total limit") - save_strategy: str = Field("no", title="Save strategy") auto_find_batch_size: bool = Field(False, title="Auto find batch size") mixed_precision: Optional[str] = Field(None, title="fp16, bf16, or None") lr: float = Field(3e-5, title="Learning rate") diff --git a/src/autotrain/trainers/image_classification/__main__.py b/src/autotrain/trainers/image_classification/__main__.py index 53c16cff85..c4654b3bc3 100644 --- a/src/autotrain/trainers/image_classification/__main__.py +++ b/src/autotrain/trainers/image_classification/__main__.py @@ -129,7 +129,7 @@ def train(config): evaluation_strategy=config.evaluation_strategy if config.valid_split is not None else "no", logging_steps=logging_steps, save_total_limit=config.save_total_limit, - save_strategy=config.save_strategy, + save_strategy=config.evaluation_strategy if config.valid_split is not None else "no", gradient_accumulation_steps=config.gradient_accumulation, report_to=config.log, auto_find_batch_size=config.auto_find_batch_size, diff --git a/src/autotrain/trainers/image_classification/params.py b/src/autotrain/trainers/image_classification/params.py index 67b17cda12..ec8e94e6f4 100644 --- a/src/autotrain/trainers/image_classification/params.py +++ b/src/autotrain/trainers/image_classification/params.py @@ -26,7 +26,6 @@ class ImageClassificationParams(AutoTrainParams): auto_find_batch_size: bool = Field(False, title="Auto find batch size") mixed_precision: Optional[str] = Field(None, title="fp16, bf16, or None") save_total_limit: int = Field(1, title="Save total limit") - save_strategy: str = Field("epoch", title="Save strategy") token: Optional[str] = Field(None, title="Hub Token") push_to_hub: bool = Field(False, title="Push to hub") evaluation_strategy: str = Field("epoch", title="Evaluation strategy") diff --git a/src/autotrain/trainers/seq2seq/__main__.py b/src/autotrain/trainers/seq2seq/__main__.py index ee57ef4c8d..147173183c 100644 --- a/src/autotrain/trainers/seq2seq/__main__.py +++ b/src/autotrain/trainers/seq2seq/__main__.py @@ -102,7 +102,7 @@ def train(config): evaluation_strategy=config.evaluation_strategy if config.valid_split is not None else "no", logging_steps=logging_steps, save_total_limit=config.save_total_limit, - save_strategy=config.save_strategy, + save_strategy=config.evaluation_strategy if config.valid_split is not None else "no", gradient_accumulation_steps=config.gradient_accumulation, report_to=config.log, auto_find_batch_size=config.auto_find_batch_size, @@ -179,6 +179,8 @@ def train(config): target_modules = config.target_modules.split(",") if config.target_modules is not None else None if target_modules: target_modules = [module.strip() for module in target_modules] + if len(target_modules) == 1 and target_modules[0] == "all-linear": + target_modules = "all-linear" lora_config = LoraConfig( r=config.lora_r, lora_alpha=config.lora_alpha, diff --git a/src/autotrain/trainers/seq2seq/params.py b/src/autotrain/trainers/seq2seq/params.py index ca925010c8..a5844e9c87 100644 --- a/src/autotrain/trainers/seq2seq/params.py +++ b/src/autotrain/trainers/seq2seq/params.py @@ -33,7 +33,6 @@ class Seq2SeqParams(AutoTrainParams): auto_find_batch_size: bool = Field(False, title="Auto find batch size") mixed_precision: Optional[str] = Field(None, title="fp16, bf16, or None") save_total_limit: int = Field(1, title="Save total limit") - save_strategy: str = Field("no", title="Save strategy") token: Optional[str] = Field(None, title="Hub Token") push_to_hub: bool = Field(False, title="Push to hub") peft: bool = Field(False, title="Use PEFT") diff --git a/src/autotrain/trainers/text_classification/__main__.py b/src/autotrain/trainers/text_classification/__main__.py index 31e5720324..a6cef37aee 100644 --- a/src/autotrain/trainers/text_classification/__main__.py +++ b/src/autotrain/trainers/text_classification/__main__.py @@ -133,7 +133,7 @@ def train(config): evaluation_strategy=config.evaluation_strategy if config.valid_split is not None else "no", logging_steps=logging_steps, save_total_limit=config.save_total_limit, - save_strategy=config.save_strategy, + save_strategy=config.evaluation_strategy if config.valid_split is not None else "no", gradient_accumulation_steps=config.gradient_accumulation, report_to=config.log, auto_find_batch_size=config.auto_find_batch_size, diff --git a/src/autotrain/trainers/text_classification/params.py b/src/autotrain/trainers/text_classification/params.py index 4791e42343..f9feba7bd3 100644 --- a/src/autotrain/trainers/text_classification/params.py +++ b/src/autotrain/trainers/text_classification/params.py @@ -28,7 +28,6 @@ class TextClassificationParams(AutoTrainParams): auto_find_batch_size: bool = Field(False, title="Auto find batch size") mixed_precision: Optional[str] = Field(None, title="fp16, bf16, or None") save_total_limit: int = Field(1, title="Save total limit") - save_strategy: str = Field("epoch", title="Save strategy") token: Optional[str] = Field(None, title="Hub Token") push_to_hub: bool = Field(False, title="Push to hub") evaluation_strategy: str = Field("epoch", title="Evaluation strategy") diff --git a/src/autotrain/trainers/token_classification/__main__.py b/src/autotrain/trainers/token_classification/__main__.py index 9d548f8d2a..d161cae3b2 100644 --- a/src/autotrain/trainers/token_classification/__main__.py +++ b/src/autotrain/trainers/token_classification/__main__.py @@ -131,7 +131,7 @@ def train(config): evaluation_strategy=config.evaluation_strategy if config.valid_split is not None else "no", logging_steps=logging_steps, save_total_limit=config.save_total_limit, - save_strategy=config.save_strategy, + save_strategy=config.evaluation_strategy if config.valid_split is not None else "no", gradient_accumulation_steps=config.gradient_accumulation, report_to=config.log, auto_find_batch_size=config.auto_find_batch_size, diff --git a/src/autotrain/trainers/token_classification/params.py b/src/autotrain/trainers/token_classification/params.py index 4383d95d85..ec29f87cef 100644 --- a/src/autotrain/trainers/token_classification/params.py +++ b/src/autotrain/trainers/token_classification/params.py @@ -28,7 +28,6 @@ class TokenClassificationParams(AutoTrainParams): auto_find_batch_size: bool = Field(False, title="Auto find batch size") mixed_precision: Optional[str] = Field(None, title="fp16, bf16, or None") save_total_limit: int = Field(1, title="Save total limit") - save_strategy: str = Field("epoch", title="Save strategy") token: Optional[str] = Field(None, title="Hub Token") push_to_hub: bool = Field(False, title="Push to hub") evaluation_strategy: str = Field("epoch", title="Evaluation strategy")