diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index e26d7fdaca..3e74d4086b 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -10,20 +10,12 @@ title: Getting Started - sections: - local: quickstart_spaces - title: Quickstart - title: AutoTrain on Hugging Face Spaces -- sections: + title: Train on Spaces - local: quickstart - title: Quickstart + title: Train Locally - local: config - title: Configurations - title: Use AutoTrain Locally -- sections: - - local: col_map - title: Understanding Column Mapping - - local: autotrain_api - title: AutoTrain API - title: Miscellaneous + title: Config File + title: Quickstart - sections: - local: tasks/llm_finetuning title: LLM Finetuning @@ -47,16 +39,8 @@ title: Tabular title: Tasks - sections: - - local: params/extractive_qa_params - title: Extractive QA - - local: params/object_detection_params - title: Object Detection - - local: params/dreambooth_params - title: DreamBooth - - local: params/seq2seq_params - title: Seq2Seq - - local: params/token_classification_params - title: Token Classification - - local: params/tabular_params - title: Tabular - title: Parameters \ No newline at end of file + - local: col_map + title: Understanding Column Mapping + - local: autotrain_api + title: AutoTrain API + title: Miscellaneous \ No newline at end of file diff --git a/docs/source/params/dreambooth_params.mdx b/docs/source/params/dreambooth_params.mdx deleted file mode 100644 index 0c0056d2b4..0000000000 --- a/docs/source/params/dreambooth_params.mdx +++ /dev/null @@ -1,3 +0,0 @@ -# DreamBooth Parameters - -[[autodoc]] trainers.dreambooth.params.DreamBoothTrainingParams \ No newline at end of file diff --git a/docs/source/params/extractive_qa_params.mdx b/docs/source/params/extractive_qa_params.mdx deleted file mode 100644 index de8e18871a..0000000000 --- a/docs/source/params/extractive_qa_params.mdx +++ /dev/null @@ -1,3 +0,0 @@ -# Extractive Question Answering Parameters - -[[autodoc]] trainers.extractive_question_answering.params.ExtractiveQuestionAnsweringParams \ No newline at end of file diff --git a/docs/source/params/object_detection_params.mdx b/docs/source/params/object_detection_params.mdx deleted file mode 100644 index 59cee7acad..0000000000 --- a/docs/source/params/object_detection_params.mdx +++ /dev/null @@ -1,3 +0,0 @@ -# Object Detection Parameters - -[[autodoc]] trainers.object_detection.params.ObjectDetectionParams diff --git a/docs/source/params/seq2seq_params.mdx b/docs/source/params/seq2seq_params.mdx deleted file mode 100644 index 82754114bf..0000000000 --- a/docs/source/params/seq2seq_params.mdx +++ /dev/null @@ -1,3 +0,0 @@ -# Seq2Seq Parameters - -[[autodoc]] trainers.seq2seq.params.Seq2SeqParams diff --git a/docs/source/params/tabular_params.mdx b/docs/source/params/tabular_params.mdx deleted file mode 100644 index c99608bd58..0000000000 --- a/docs/source/params/tabular_params.mdx +++ /dev/null @@ -1,3 +0,0 @@ -# Tabular Parameters - -[[autodoc]] trainers.tabular.params.TabularParams diff --git a/docs/source/params/token_classification_params.mdx b/docs/source/params/token_classification_params.mdx deleted file mode 100644 index 00fc66b18b..0000000000 --- a/docs/source/params/token_classification_params.mdx +++ /dev/null @@ -1,3 +0,0 @@ -# Token Classification Parameters - -[[autodoc]] trainers.token_classification.params.TokenClassificationParams diff --git a/docs/source/tasks/dreambooth.mdx b/docs/source/tasks/dreambooth.mdx index 9c5321567e..f8402a9ed8 100644 --- a/docs/source/tasks/dreambooth.mdx +++ b/docs/source/tasks/dreambooth.mdx @@ -33,3 +33,8 @@ This token acts as a unique identifier for your subject within the model. Typically, you will use a simple, descriptive keyword like prompt in the parameters section of your training setup. This token will be used to generate new images of your subject by the model. + + +## Parameters + +[[autodoc]] trainers.dreambooth.params.DreamBoothTrainingParams \ No newline at end of file diff --git a/docs/source/tasks/extractive_qa.mdx b/docs/source/tasks/extractive_qa.mdx index e340b97bed..5a303e1a4b 100644 --- a/docs/source/tasks/extractive_qa.mdx +++ b/docs/source/tasks/extractive_qa.mdx @@ -28,4 +28,59 @@ Note: the preferred format for question answering is JSONL, if you want to use C Example dataset from Hugging Face Hub: [lhoestq/squad](https://huggingface.co/datasets/lhoestq/squad) -P.S. You can use both squad and squad v2 data format with correct column mappings. \ No newline at end of file +P.S. You can use both squad and squad v2 data format with correct column mappings. + +## Training Locally + +To train an Extractive QA model locally, you need a config file: + +```yaml +task: extractive-qa +base_model: google-bert/bert-base-uncased +project_name: autotrain-bert-ex-qa1 +log: tensorboard +backend: local + +data: + path: lhoestq/squad + train_split: train + valid_split: validation + column_mapping: + text_column: context + question_column: question + answer_column: answers + +params: + max_seq_length: 512 + max_doc_stride: 128 + epochs: 3 + batch_size: 4 + lr: 2e-5 + optimizer: adamw_torch + scheduler: linear + gradient_accumulation: 1 + mixed_precision: fp16 + +hub: + username: ${HF_USERNAME} + token: ${HF_TOKEN} + push_to_hub: true +``` + +To train the model, run the following command: + +```bash +$ autotrain --config config.yaml +``` + +Here, we are training a BERT model on the SQuAD dataset using the Extractive QA task. The model is trained for 3 epochs with a batch size of 4 and a learning rate of 2e-5. The training process is logged using TensorBoard. The model is trained locally and pushed to the Hugging Face Hub after training. + +## Training on the Hugging Face Spaces + +![AutoTrain Extractive Question Answering on Hugging Face Spaces](https://raw.githubusercontent.com/huggingface/autotrain-advanced/main/static/ext_qa.png) + +As always, pay special attention to column mapping. + +## Parameters + +[[autodoc]] trainers.extractive_question_answering.params.ExtractiveQuestionAnsweringParams \ No newline at end of file diff --git a/docs/source/tasks/object_detection.mdx b/docs/source/tasks/object_detection.mdx index a05d1bdc29..3d13309789 100644 --- a/docs/source/tasks/object_detection.mdx +++ b/docs/source/tasks/object_detection.mdx @@ -55,4 +55,8 @@ Some points to keep in mind: - There must not be any other files in the zip file. - There must not be any other folders inside the zip folder. -When train.zip is decompressed, it creates no folders: only images and metadata.jsonl. \ No newline at end of file +When train.zip is decompressed, it creates no folders: only images and metadata.jsonl. + +## Parameters + +[[autodoc]] trainers.object_detection.params.ObjectDetectionParams diff --git a/docs/source/tasks/seq2seq.mdx b/docs/source/tasks/seq2seq.mdx index e8505cdbcc..f798ea1606 100644 --- a/docs/source/tasks/seq2seq.mdx +++ b/docs/source/tasks/seq2seq.mdx @@ -30,3 +30,8 @@ Or as a JSONL file: ## Columns Your CSV/JSONL dataset must have two columns: `text` and `target`. + + +## Parameters + +[[autodoc]] trainers.seq2seq.params.Seq2SeqParams diff --git a/docs/source/tasks/tabular.mdx b/docs/source/tasks/tabular.mdx index 45374c9bf5..99bb980184 100644 --- a/docs/source/tasks/tabular.mdx +++ b/docs/source/tasks/tabular.mdx @@ -42,3 +42,8 @@ id,category1,category2,feature1,target ## Columns Your CSV dataset must have two columns: `id` and `target`. + + +## Parameters + +[[autodoc]] trainers.tabular.params.TabularParams diff --git a/docs/source/tasks/token_classification.mdx b/docs/source/tasks/token_classification.mdx index 704b6ec93f..d4a2b46db3 100644 --- a/docs/source/tasks/token_classification.mdx +++ b/docs/source/tasks/token_classification.mdx @@ -54,3 +54,8 @@ for chunk in pd.read_csv('example.csv', chunksize=chunk_size): ## Columns Your CSV/JSONL dataset must have two columns: `tokens` and `tags`. + + +## Parameters + +[[autodoc]] trainers.token_classification.params.TokenClassificationParams diff --git a/static/ext_qa.png b/static/ext_qa.png new file mode 100644 index 0000000000..215a5cbf6e Binary files /dev/null and b/static/ext_qa.png differ