diff --git a/src/autotrain/cli/run_image_classification.py b/src/autotrain/cli/run_image_classification.py index 31e387efa0..eccb9df972 100644 --- a/src/autotrain/cli/run_image_classification.py +++ b/src/autotrain/cli/run_image_classification.py @@ -204,10 +204,11 @@ def register_subcommand(parser: ArgumentParser): "type": str, }, { - "arg": "--log-to-wandb", - "help": "Use Weights & Biases tracking", + "arg": "--log", + "help": "Use experiment tracking", "required": False, - "action": "store_true", + "type": str, + "default": "none", }, ] run_text_classification_parser = parser.add_parser( @@ -242,7 +243,6 @@ def __init__(self, args): "auto_find_batch_size", "fp16", "push_to_hub", - "log_to_wandb" ] for arg_name in store_true_arg_names: if getattr(self.args, arg_name) is None: @@ -298,7 +298,7 @@ def run(self): fp16=self.args.fp16, push_to_hub=self.args.push_to_hub, repo_id=self.args.repo_id, - log_to_wandb=self.args.log_to_wandb, + log=self.args.log, ) params.save(output_dir=self.args.project_name) if self.num_gpus == 1: diff --git a/src/autotrain/cli/run_llm.py b/src/autotrain/cli/run_llm.py index fbe49f6356..58468fed1d 100644 --- a/src/autotrain/cli/run_llm.py +++ b/src/autotrain/cli/run_llm.py @@ -341,12 +341,13 @@ def register_subcommand(parser: ArgumentParser): "alias": ["--use-flash-attention-2", "--use-fa2"], }, { - "arg": "--log_to_wandb", - "help": "Use Weights & Biases tracking", + "arg": "--log", + "help": "Use experiment tracking", "required": False, - "action": "store_true", - "alias": ["--log-to-wandb"], + "type": str, + "default": "none", }, + { "arg": "--disable_gradient_checkpointing", "help": "Disable gradient checkpointing", "required": False, @@ -393,7 +394,6 @@ def __init__(self, args): "use_int4", "merge_adapter", "use_flash_attention_2", - "log_to_wandb", "disable_gradient_checkpointing", ] for arg_name in store_true_arg_names: @@ -495,7 +495,7 @@ def run(self): merge_adapter=self.args.merge_adapter, username=self.args.username, use_flash_attention_2=self.args.use_flash_attention_2, - log_to_wandb=self.args.log_to_wandb, + log=self.args.log, rejected_text_column=self.args.rejected_text_column, disable_gradient_checkpointing=self.args.disable_gradient_checkpointing, ) diff --git a/src/autotrain/cli/run_text_classification.py b/src/autotrain/cli/run_text_classification.py index d69f876687..4d8080ec03 100644 --- a/src/autotrain/cli/run_text_classification.py +++ b/src/autotrain/cli/run_text_classification.py @@ -233,10 +233,11 @@ def register_subcommand(parser: ArgumentParser): "type": str, }, { - "arg": "--log-to-wandb", - "help": "Use Weights & Biases tracking", + "arg": "--log", + "help": "Use experiment tracking", "required": False, - "action": "store_true", + "type": str, + "default": "none", }, ] run_text_classification_parser = parser.add_parser( @@ -271,7 +272,6 @@ def __init__(self, args): "auto_find_batch_size", "fp16", "push_to_hub", - "log_to_wandb", ] for arg_name in store_true_arg_names: if getattr(self.args, arg_name) is None: @@ -333,7 +333,7 @@ def run(self): repo_id=self.args.repo_id, token=self.args.token, username=self.args.username, - log_to_wandb=self.args.log_to_wandb, + log=self.args.log, ) if self.args.backend.startswith("spaces"): diff --git a/src/autotrain/project.py b/src/autotrain/project.py index ecb11a7e22..efa6745d4b 100644 --- a/src/autotrain/project.py +++ b/src/autotrain/project.py @@ -81,7 +81,6 @@ def _munge_common_params(self, job_idx): _params["repo_id"] = f"{self.username}/{self.project_name}-{job_idx}" _params["data_path"] = self.data_path _params["username"] = self.username - _params["log_to_wandb"] = True return _params def _munge_params_llm(self, job_idx): diff --git a/src/autotrain/trainers/clm/__main__.py b/src/autotrain/trainers/clm/__main__.py index 9fda3e052a..1991c2adc9 100644 --- a/src/autotrain/trainers/clm/__main__.py +++ b/src/autotrain/trainers/clm/__main__.py @@ -305,10 +305,6 @@ def train(config): else: logging_steps = config.logging_steps - report_to = "tensorboard" - if config.log_to_wandb: - report_to = "wandb" - training_args = dict( output_dir=config.project_name, per_device_train_batch_size=config.batch_size, @@ -320,7 +316,7 @@ def train(config): save_total_limit=config.save_total_limit, save_strategy=config.save_strategy, gradient_accumulation_steps=config.gradient_accumulation, - report_to=report_to, + report_to=config.log, auto_find_batch_size=config.auto_find_batch_size, lr_scheduler_type=config.scheduler, optim=config.optimizer, diff --git a/src/autotrain/trainers/clm/params.py b/src/autotrain/trainers/clm/params.py index 952537ae26..03cd37782c 100644 --- a/src/autotrain/trainers/clm/params.py +++ b/src/autotrain/trainers/clm/params.py @@ -46,7 +46,7 @@ class LLMTrainingParams(BaseModel): merge_adapter: bool = Field(False, title="Merge adapter") username: str = Field(None, title="Hugging Face Username") use_flash_attention_2: bool = Field(False, title="Use flash attention 2") - log_to_wandb: bool = Field(False, title="Logging using Weights & Biases") + log: str = Field("none", title="Logging using experiment tracking") disable_gradient_checkpointing: bool = Field(False, title="Gradient checkpointing") def save(self, output_dir): diff --git a/src/autotrain/trainers/image_classification.py b/src/autotrain/trainers/image_classification.py index 9186e78fc3..3eb07ad0d1 100644 --- a/src/autotrain/trainers/image_classification.py +++ b/src/autotrain/trainers/image_classification.py @@ -234,10 +234,6 @@ def train(co2_tracker, payload, huggingface_token, model_path): if device == "cpu": fp16 = False - report_to = "none" - if job_config.log_to_wandb: - report_to = "wandb" - training_args = dict( output_dir=model_path, per_device_train_batch_size=job_config.train_batch_size, @@ -252,7 +248,7 @@ def train(co2_tracker, payload, huggingface_token, model_path): save_strategy="epoch", disable_tqdm=not bool(os.environ.get("ENABLE_TQDM", 0)), gradient_accumulation_steps=job_config.gradient_accumulation_steps, - report_to=report_to, + report_to=job_config.log, auto_find_batch_size=True, lr_scheduler_type=job_config.scheduler, optim=job_config.optimizer, diff --git a/src/autotrain/trainers/image_classification/__main__.py b/src/autotrain/trainers/image_classification/__main__.py index d9bb1f282d..b06977d3d2 100644 --- a/src/autotrain/trainers/image_classification/__main__.py +++ b/src/autotrain/trainers/image_classification/__main__.py @@ -99,10 +99,6 @@ def train(config): else: logging_steps = config.logging_steps - report_to = "tensorboard" - if config.log_to_wandb: - report_to = "wandb" - training_args = dict( output_dir=config.project_name, per_device_train_batch_size=config.batch_size, @@ -115,7 +111,7 @@ def train(config): save_total_limit=config.save_total_limit, save_strategy=config.save_strategy, gradient_accumulation_steps=config.gradient_accumulation, - report_to=report_to, + report_to=config.log, auto_find_batch_size=config.auto_find_batch_size, lr_scheduler_type=config.scheduler, optim=config.optimizer, diff --git a/src/autotrain/trainers/image_classification/params.py b/src/autotrain/trainers/image_classification/params.py index 1e7bdcadd0..19b77e4635 100644 --- a/src/autotrain/trainers/image_classification/params.py +++ b/src/autotrain/trainers/image_classification/params.py @@ -30,7 +30,7 @@ class ImageClassificationParams(BaseModel): evaluation_strategy: str = Field("epoch", title="Evaluation strategy") image_column: str = Field("image", title="Image column") target_column: str = Field("target", title="Target column") - log_to_wandb: bool = Field(False, title="Logging using Weights & Biases") + log: str = Field("none", title="Logging using experiment tracking") def __str__(self): data = self.dict() diff --git a/src/autotrain/trainers/lm_trainer.py b/src/autotrain/trainers/lm_trainer.py index 65ca35afcf..4567d154b9 100644 --- a/src/autotrain/trainers/lm_trainer.py +++ b/src/autotrain/trainers/lm_trainer.py @@ -388,10 +388,6 @@ def group_texts(examples): if logging_steps == 0: logging_steps = 1 - report_to = "none" - if job_config.log_to_wandb: - report_to = "wandb" - training_args = dict( output_dir=model_path, per_device_train_batch_size=job_config.train_batch_size, @@ -404,7 +400,7 @@ def group_texts(examples): save_strategy="epoch", disable_tqdm=not bool(os.environ.get("ENABLE_TQDM", 0)), gradient_accumulation_steps=job_config.gradient_accumulation_steps, - report_to=report_to, + report_to=job_config.log, auto_find_batch_size=True, lr_scheduler_type=job_config.scheduler, optim=job_config.optimizer, diff --git a/src/autotrain/trainers/text_classification.py b/src/autotrain/trainers/text_classification.py index 7185525900..e2ca3a5352 100644 --- a/src/autotrain/trainers/text_classification.py +++ b/src/autotrain/trainers/text_classification.py @@ -205,10 +205,6 @@ def train(co2_tracker, payload, huggingface_token, model_path): if model_config.model_type in FP32_MODELS or device == "cpu": fp16 = False - report_to = "none" - if job_config.log_to_wandb: - report_to = "wandb" - training_args = dict( output_dir="/tmp/autotrain", per_device_train_batch_size=job_config.train_batch_size, @@ -223,7 +219,7 @@ def train(co2_tracker, payload, huggingface_token, model_path): save_strategy="epoch", disable_tqdm=not bool(os.environ.get("ENABLE_TQDM", 0)), gradient_accumulation_steps=job_config.gradient_accumulation_steps, - report_to=report_to, + report_to=job_config.log, auto_find_batch_size=True, lr_scheduler_type=job_config.scheduler, optim=job_config.optimizer, diff --git a/src/autotrain/trainers/text_classification/__main__.py b/src/autotrain/trainers/text_classification/__main__.py index 6584f21a75..538c419012 100644 --- a/src/autotrain/trainers/text_classification/__main__.py +++ b/src/autotrain/trainers/text_classification/__main__.py @@ -114,10 +114,6 @@ def train(config): else: logging_steps = config.logging_steps - report_to = "tensorboard" - if config.log_to_wandb: - report_to = "wandb" - training_args = dict( output_dir=config.project_name, per_device_train_batch_size=config.batch_size, @@ -130,7 +126,7 @@ def train(config): save_total_limit=config.save_total_limit, save_strategy=config.save_strategy, gradient_accumulation_steps=config.gradient_accumulation, - report_to=report_to, + report_to=config.log, auto_find_batch_size=config.auto_find_batch_size, lr_scheduler_type=config.scheduler, optim=config.optimizer, diff --git a/src/autotrain/trainers/text_classification/params.py b/src/autotrain/trainers/text_classification/params.py index 3ee6c633af..83b26e150b 100644 --- a/src/autotrain/trainers/text_classification/params.py +++ b/src/autotrain/trainers/text_classification/params.py @@ -34,7 +34,7 @@ class TextClassificationParams(BaseModel): repo_id: str = Field(None, title="Repo id") evaluation_strategy: str = Field("epoch", title="Evaluation strategy") username: str = Field(None, title="Hugging Face Username") - log_to_wandb: bool = Field(False, title="Logging using Weights & Biases") + log: str = Field("none", title="Logging using experiment tracking") def __str__(self): data = self.dict()