fixes

huggingface · Jun 5, 2024 · 7a1c833 · 7a1c833
1 parent 7d6acc7
commit 7a1c833
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 5 deletions.
diff --git a/configs/object_detection/local.yml b/configs/object_detection/local.yml
@@ -0,0 +1,31 @@
+task: object_detection
+base_model: facebook/detr-resnet-50
+project_name: autotrain-obj-det-local-dataset
+log: tensorboard
+backend: local
+
+data:
+  path: data/ # this contains the train and validation folders
+  train_split: train # this is the folder name inside the data path, contains images and metadata.jsonl
+  valid_split: validation # this is the folder name inside the data path, contains images and metadata.jsonl, optional
+  column_mapping:
+    image_column: image
+    objects_column: objects
+
+params:
+  image_square_size: 600
+  epochs: 100
+  batch_size: 8
+  lr: 5e-5
+  weight_decay: 1e-4
+  optimizer: adamw_torch
+  scheduler: linear
+  gradient_accumulation: 1
+  mixed_precision: fp16
+  early_stopping_patience: 50
+  early_stopping_threshold: 0.001
+
+hub:
+  username: ${HF_USERNAME}
+  token: ${HF_TOKEN}
+  push_to_hub: true
diff --git a/docs/source/llm_finetuning.mdx b/docs/source/llm_finetuning.mdx
@@ -27,9 +27,9 @@ An example dataset for this format can be found here: https://huggingface.co/dat
 
 For SFT/Generic training, your dataset must have a `text` column
 
-### Data Format For Reward/ORPO Trainer
+### Data Format For Reward Trainer
 
-For Reward/ORPO Trainer, the data should be in the following format:
+For Reward Trainer, the data should be in the following format:
 
 | text                                                          | rejected_text                                                     |
 |---------------------------------------------------------------|-------------------------------------------------------------------|
@@ -40,9 +40,9 @@ For Reward/ORPO Trainer, the data should be in the following format:
 
 For Reward Trainer, your dataset must have a `text` column (aka chosen text) and a `rejected_text` column.
 
-### Data Format For DPO Trainer
+### Data Format For DPO/ORPO Trainer
 
-For DPO Trainer, the data should be in the following format:
+For DPO/ORPO Trainer, the data should be in the following format:
 
 | prompt                                  | text                | rejected_text      |
 |-----------------------------------------|---------------------|--------------------|
@@ -54,7 +54,7 @@ For DPO Trainer, the data should be in the following format:
 | Which is the best programming language? | Python              | C++                |
 | Which is the best programming language? | Java                | C++                |
 
-For DPO Trainer, your dataset must have a `prompt` column, a `text` column (aka chosen text) and a `rejected_text` column.
+For DPO/ORPO Trainer, your dataset must have a `prompt` column, a `text` column (aka chosen text) and a `rejected_text` column.
 
 
 For all tasks, you can use both CSV and JSONL files!