Skip to content

Commit

Permalink
add configs
Browse files Browse the repository at this point in the history
  • Loading branch information
abhishekkrthakur committed May 3, 2024
1 parent f39b93c commit eaaf8a2
Show file tree
Hide file tree
Showing 15 changed files with 282 additions and 40 deletions.
24 changes: 24 additions & 0 deletions configs/dreambooth/sd15_colab.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
task: dreambooth
base_model: runwayml/stable-diffusion-v1-5
project_name: autotrain-sd15-finetuned
backend: local-cli

data:
path: data/ # store all images in this folder
prompt: photo of sks person # prompt for the model

params:
resolution: 512
batch_size: 1
num_steps: 500
lr: 1e-4
gradient_accumulation: 4
mixed_precision: fp16
train_text_encoder: false
xformers: false
use_8bit_adam: false

hub:
username: ${HF_USERNAME}
token: ${HF_TOKEN}
push_to_hub: true
25 changes: 25 additions & 0 deletions configs/dreambooth/sdxl_colab.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
task: dreambooth
base_model: stabilityai/stable-diffusion-xl-base-1.0
project_name: autotrain-sdxl-finetuned
backend: local-cli

data:
path: data/ # store all images in this folder
prompt: photo of sks person # prompt for the model

params:
resolution: 1024
batch_size: 1
num_steps: 500
lr: 1e-4
gradient_accumulation: 4
mixed_precision: fp16
train_text_encoder: false
xformers: false
use_8bit_adam: false
xl: true

hub:
username: ${HF_USERNAME}
token: ${HF_TOKEN}
push_to_hub: true
27 changes: 27 additions & 0 deletions configs/image_classification/hub_dataset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
task: image_classification
base_model: google/vit-base-patch16-224
project_name: autotrain-cats-vs-dogs-finetuned
log: tensorboard
backend: local-cli

data:
path: cats_vs_dogs
train_split: train
valid_split: null
column_mapping:
image_column: image
target_column: labels

params:
epochs: 2
batch_size: 4
lr: 2e-5
optimizer: adamw_torch
scheduler: linear
gradient_accumulation: 1
mixed_precision: fp16

hub:
username: ${HF_USERNAME}
token: ${HF_TOKEN}
push_to_hub: true
2 changes: 1 addition & 1 deletion configs/llm_finetuning/gpt2_sft.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
task: lm_training
task: llm
base_model: openai-community/gpt2
project_name: autotrain-gpt2-finetuned-guanaco
log: tensorboard
Expand Down
2 changes: 1 addition & 1 deletion configs/llm_finetuning/llama3-8b-orpo.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
task: lm_training
task: llm
base_model: meta-llama/Meta-Llama-3-8B-Instruct
project_name: llama3-8b-orpo
log: tensorboard
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
task: text_multi_class_classification
task: text_classification
base_model: google-bert/bert-base-uncased
project_name: autotrain-bert-imdb-finetuned
log: tensorboard
Expand Down
28 changes: 28 additions & 0 deletions configs/text_classification/local_dataset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
task: text_classification
base_model: google-bert/bert-base-uncased
project_name: autotrain-bert-imdb-finetuned
log: tensorboard
backend: local-cli

data:
path: data/ # this must be the path to the directory containing the train and valid files
train_split: train # this must be either train.csv or train.json
valid_split: valid # this must be either valid.csv or valid.json
column_mapping:
text_column: text # this must be the name of the column containing the text
target_column: label # this must be the name of the column containing the target

params:
max_seq_length: 512
epochs: 3
batch_size: 4
lr: 2e-5
optimizer: adamw_torch
scheduler: linear
gradient_accumulation: 1
mixed_precision: fp16

hub:
username: ${HF_USERNAME}
token: ${HF_TOKEN}
push_to_hub: true
28 changes: 28 additions & 0 deletions configs/text_regression/hub_dataset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
task: text_regression
base_model: google-bert/bert-base-uncased
project_name: autotrain-bert-sms-spam-finetuned
log: tensorboard
backend: local-cli

data:
path: sms_spam
train_split: train
valid_split: null
column_mapping:
text_column: sms
target_column: label

params:
max_seq_length: 512
epochs: 3
batch_size: 4
lr: 2e-5
optimizer: adamw_torch
scheduler: linear
gradient_accumulation: 1
mixed_precision: fp16

hub:
username: ${HF_USERNAME}
token: ${HF_TOKEN}
push_to_hub: true
28 changes: 28 additions & 0 deletions configs/text_regression/local_dataset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
task: text_regression
base_model: google-bert/bert-base-uncased
project_name: autotrain-bert-custom-finetuned
log: tensorboard
backend: local-cli

data:
path: data/ # this must be the path to the directory containing the train and valid files
train_split: train # this must be either train.csv or train.json
valid_split: valid # this must be either valid.csv or valid.json
column_mapping:
text_column: text # this must be the name of the column containing the text
target_column: label # this must be the name of the column containing the target

params:
max_seq_length: 512
epochs: 3
batch_size: 4
lr: 2e-5
optimizer: adamw_torch
scheduler: linear
gradient_accumulation: 1
mixed_precision: fp16

hub:
username: ${HF_USERNAME}
token: ${HF_TOKEN}
push_to_hub: true
28 changes: 28 additions & 0 deletions configs/token_classification/hub_dataset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
task: token_classification
base_model: google-bert/bert-base-uncased
project_name: autotrain-bert-conll2003-finetuned
log: tensorboard
backend: local-cli

data:
path: conll2003
train_split: train
valid_split: validation
column_mapping:
tokens_column: tokens
tags_column: ner_tags

params:
max_seq_length: 512
epochs: 3
batch_size: 4
lr: 2e-5
optimizer: adamw_torch
scheduler: linear
gradient_accumulation: 1
mixed_precision: fp16

hub:
username: ${HF_USERNAME}
token: ${HF_TOKEN}
push_to_hub: true
28 changes: 28 additions & 0 deletions configs/token_classification/local_dataset.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
task: token_classification
base_model: google-bert/bert-base-uncased
project_name: autotrain-bert-custom-finetuned
log: tensorboard
backend: local-cli

data:
path: data/ # this must be the path to the directory containing the train and valid files
train_split: train # this must be either train.json
valid_split: valid # this must be either valid.json, can also be set to null
column_mapping:
text_column: text # this must be the name of the column containing the text
target_column: label # this must be the name of the column containing the target

params:
max_seq_length: 512
epochs: 3
batch_size: 4
lr: 2e-5
optimizer: adamw_torch
scheduler: linear
gradient_accumulation: 1
mixed_precision: fp16

hub:
username: ${HF_USERNAME}
token: ${HF_TOKEN}
push_to_hub: true
2 changes: 1 addition & 1 deletion src/autotrain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@


logger = Logger().get_logger()
__version__ = "0.7.77.dev0"
__version__ = "0.7.78.dev0"
4 changes: 2 additions & 2 deletions src/autotrain/cli/autotrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from autotrain.cli.run_text_regression import RunAutoTrainTextRegressionCommand
from autotrain.cli.run_token_classification import RunAutoTrainTokenClassificationCommand
from autotrain.cli.run_tools import RunAutoTrainToolsCommand
from autotrain.configparser import ConfigParser
from autotrain.parser import AutoTrainConfigParser


def main():
Expand Down Expand Up @@ -50,7 +50,7 @@ def main():

if args.config:
logger.info(f"Using AutoTrain configuration: {args.config}")
cp = ConfigParser(args.config)
cp = AutoTrainConfigParser(args.config)
cp.run()
exit(0)

Expand Down
10 changes: 5 additions & 5 deletions src/autotrain/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,11 +380,11 @@ def token_clf_munge_data(params, local):

def img_clf_munge_data(params, local):
train_data_path = f"{params.data_path}/{params.train_split}"
if params.valid_split is not None:
valid_data_path = f"{params.data_path}/{params.valid_split}"
else:
valid_data_path = None
if os.path.isdir(train_data_path) or os.path.isdir(valid_data_path):
# if params.valid_split is not None:
# valid_data_path = f"{params.data_path}/{params.valid_split}"
# else:
# valid_data_path = None
if os.path.isdir(train_data_path):
raise Exception("Image classification is not yet supported for local datasets using the CLI. Please use UI.")
return params

Expand Down
Loading

0 comments on commit eaaf8a2

Please sign in to comment.