Skip to content

Commit

Permalink
Merge pull request #2 from soumik12345/wandb-dev
Browse files Browse the repository at this point in the history
Adding wandb integration to text classification, image classification & lm finetuning
  • Loading branch information
soumik12345 authored Oct 10, 2023
2 parents 1931728 + 7cfb7cb commit ec65074
Show file tree
Hide file tree
Showing 13 changed files with 59 additions and 6 deletions.
8 changes: 8 additions & 0 deletions src/autotrain/cli/run_image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ def register_subcommand(parser: ArgumentParser):
"required": False,
"type": str,
},
{
"arg": "--log-to-wandb",
"help": "Use Weights & Biases tracking",
"required": False,
"action": "store_true",
},
]
run_text_classification_parser = parser.add_parser(
"image-classification", description="✨ Run AutoTrain Image Classification"
Expand Down Expand Up @@ -236,6 +242,7 @@ def __init__(self, args):
"auto_find_batch_size",
"fp16",
"push_to_hub",
"log_to_wandb"
]
for arg_name in store_true_arg_names:
if getattr(self.args, arg_name) is None:
Expand Down Expand Up @@ -291,6 +298,7 @@ def run(self):
fp16=self.args.fp16,
push_to_hub=self.args.push_to_hub,
repo_id=self.args.repo_id,
log_to_wandb=self.args.log_to_wandb,
)
params.save(output_dir=self.args.project_name)
if self.num_gpus == 1:
Expand Down
9 changes: 9 additions & 0 deletions src/autotrain/cli/run_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,13 @@ def register_subcommand(parser: ArgumentParser):
"action": "store_true",
"alias": ["--use-flash-attention-2", "--use-fa2"],
},
{
"arg": "--log_to_wandb",
"help": "Use Weights & Biases tracking",
"required": False,
"action": "store_true",
"alias": ["--log-to-wandb"],
},
]
run_llm_parser = parser.add_parser("llm", description="✨ Run AutoTrain LLM")
for arg in arg_list:
Expand Down Expand Up @@ -372,6 +379,7 @@ def __init__(self, args):
"use_int4",
"merge_adapter",
"use_flash_attention_2",
"log_to_wandb",
]
for arg_name in store_true_arg_names:
if getattr(self.args, arg_name) is None:
Expand Down Expand Up @@ -466,6 +474,7 @@ def run(self):
merge_adapter=self.args.merge_adapter,
username=self.args.username,
use_flash_attention_2=self.args.use_flash_attention_2,
log_to_wandb=self.args.log_to_wandb,
)

# space training
Expand Down
8 changes: 8 additions & 0 deletions src/autotrain/cli/run_text_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,12 @@ def register_subcommand(parser: ArgumentParser):
"required": False,
"type": str,
},
{
"arg": "--log-to-wandb",
"help": "Use Weights & Biases tracking",
"required": False,
"action": "store_true",
},
]
run_text_classification_parser = parser.add_parser(
"text-classification", description="✨ Run AutoTrain Text Classification"
Expand Down Expand Up @@ -265,6 +271,7 @@ def __init__(self, args):
"auto_find_batch_size",
"fp16",
"push_to_hub",
"log_to_wandb",
]
for arg_name in store_true_arg_names:
if getattr(self.args, arg_name) is None:
Expand Down Expand Up @@ -326,6 +333,7 @@ def run(self):
repo_id=self.args.repo_id,
token=self.args.token,
username=self.args.username,
log_to_wandb=self.args.log_to_wandb,
)

if self.args.backend.startswith("spaces"):
Expand Down
1 change: 1 addition & 0 deletions src/autotrain/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def _munge_common_params(self, job_idx):
_params["repo_id"] = f"{self.username}/{self.project_name}-{job_idx}"
_params["data_path"] = self.data_path
_params["username"] = self.username
_params["log_to_wandb"] = True
return _params

def _munge_params_llm(self, job_idx):
Expand Down
6 changes: 5 additions & 1 deletion src/autotrain/trainers/clm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,10 @@ def train(config):

else:
logging_steps = config.logging_steps

report_to = "tensorboard"
if config.log_to_wandb:
report_to = "wandb"

training_args = dict(
output_dir=config.project_name,
Expand All @@ -237,7 +241,7 @@ def train(config):
save_total_limit=config.save_total_limit,
save_strategy=config.save_strategy,
gradient_accumulation_steps=config.gradient_accumulation,
report_to="tensorboard",
report_to=report_to,
auto_find_batch_size=config.auto_find_batch_size,
lr_scheduler_type=config.scheduler,
optim=config.optimizer,
Expand Down
1 change: 1 addition & 0 deletions src/autotrain/trainers/clm/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class LLMTrainingParams(BaseModel):
merge_adapter: bool = Field(False, title="Merge adapter")
username: str = Field(None, title="Hugging Face Username")
use_flash_attention_2: bool = Field(False, title="Use flash attention 2")
log_to_wandb: bool = Field(False, title="Logging using Weights & Biases")

def save(self, output_dir):
os.makedirs(output_dir, exist_ok=True)
Expand Down
6 changes: 5 additions & 1 deletion src/autotrain/trainers/image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ def train(co2_tracker, payload, huggingface_token, model_path):
fp16 = True
if device == "cpu":
fp16 = False

report_to = "none"
if job_config.log_to_wandb:
report_to = "wandb"

training_args = dict(
output_dir=model_path,
Expand All @@ -248,7 +252,7 @@ def train(co2_tracker, payload, huggingface_token, model_path):
save_strategy="epoch",
disable_tqdm=not bool(os.environ.get("ENABLE_TQDM", 0)),
gradient_accumulation_steps=job_config.gradient_accumulation_steps,
report_to="none",
report_to=report_to,
auto_find_batch_size=True,
lr_scheduler_type=job_config.scheduler,
optim=job_config.optimizer,
Expand Down
6 changes: 5 additions & 1 deletion src/autotrain/trainers/image_classification/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ def train(config):

else:
logging_steps = config.logging_steps

report_to = "tensorboard"
if config.log_to_wandb:
report_to = "wandb"

training_args = dict(
output_dir=config.project_name,
Expand All @@ -111,7 +115,7 @@ def train(config):
save_total_limit=config.save_total_limit,
save_strategy=config.save_strategy,
gradient_accumulation_steps=config.gradient_accumulation,
report_to="tensorboard",
report_to=report_to,
auto_find_batch_size=config.auto_find_batch_size,
lr_scheduler_type=config.scheduler,
optim=config.optimizer,
Expand Down
1 change: 1 addition & 0 deletions src/autotrain/trainers/image_classification/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class ImageClassificationParams(BaseModel):
evaluation_strategy: str = Field("epoch", title="Evaluation strategy")
image_column: str = Field("image", title="Image column")
target_column: str = Field("target", title="Target column")
log_to_wandb: bool = Field(False, title="Logging using Weights & Biases")

def __str__(self):
data = self.dict()
Expand Down
6 changes: 5 additions & 1 deletion src/autotrain/trainers/lm_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,10 @@ def group_texts(examples):
logging_steps = int(0.2 * len(valid_data) / job_config.train_batch_size)
if logging_steps == 0:
logging_steps = 1

report_to = "none"
if job_config.log_to_wandb:
report_to = "wandb"

training_args = dict(
output_dir=model_path,
Expand All @@ -400,7 +404,7 @@ def group_texts(examples):
save_strategy="epoch",
disable_tqdm=not bool(os.environ.get("ENABLE_TQDM", 0)),
gradient_accumulation_steps=job_config.gradient_accumulation_steps,
report_to="none",
report_to=report_to,
auto_find_batch_size=True,
lr_scheduler_type=job_config.scheduler,
optim=job_config.optimizer,
Expand Down
6 changes: 5 additions & 1 deletion src/autotrain/trainers/text_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ def train(co2_tracker, payload, huggingface_token, model_path):
fp16 = True
if model_config.model_type in FP32_MODELS or device == "cpu":
fp16 = False

report_to = "none"
if job_config.log_to_wandb:
report_to = "wandb"

training_args = dict(
output_dir="/tmp/autotrain",
Expand All @@ -219,7 +223,7 @@ def train(co2_tracker, payload, huggingface_token, model_path):
save_strategy="epoch",
disable_tqdm=not bool(os.environ.get("ENABLE_TQDM", 0)),
gradient_accumulation_steps=job_config.gradient_accumulation_steps,
report_to="none",
report_to=report_to,
auto_find_batch_size=True,
lr_scheduler_type=job_config.scheduler,
optim=job_config.optimizer,
Expand Down
6 changes: 5 additions & 1 deletion src/autotrain/trainers/text_classification/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ def train(config):

else:
logging_steps = config.logging_steps

report_to = "tensorboard"
if config.log_to_wandb:
report_to = "wandb"

training_args = dict(
output_dir=config.project_name,
Expand All @@ -126,7 +130,7 @@ def train(config):
save_total_limit=config.save_total_limit,
save_strategy=config.save_strategy,
gradient_accumulation_steps=config.gradient_accumulation,
report_to="tensorboard",
report_to=report_to,
auto_find_batch_size=config.auto_find_batch_size,
lr_scheduler_type=config.scheduler,
optim=config.optimizer,
Expand Down
1 change: 1 addition & 0 deletions src/autotrain/trainers/text_classification/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ class TextClassificationParams(BaseModel):
repo_id: str = Field(None, title="Repo id")
evaluation_strategy: str = Field("epoch", title="Evaluation strategy")
username: str = Field(None, title="Hugging Face Username")
log_to_wandb: bool = Field(False, title="Logging using Weights & Biases")

def __str__(self):
data = self.dict()
Expand Down

0 comments on commit ec65074

Please sign in to comment.