From f1f90405cab5925efebf78b6cab19f1dc27136b9 Mon Sep 17 00:00:00 2001
From: abhishek thakur <1183441+abhishekkrthakur@users.noreply.github.com>
Date: Thu, 13 Jun 2024 17:54:25 +0200
Subject: [PATCH] Image scoring (#680)
---
configs/image_classification/local.yml | 27 +++
configs/image_scoring/hub_dataset.yml | 27 +++
configs/image_scoring/local.yml | 28 +++
docs/source/_toctree.yml | 4 +
docs/source/image_regression.mdx | 58 +++++
docs/source/image_regression_params.mdx | 3 +
docs/source/object_detection.mdx | 4 +-
src/autotrain/app/api_routes.py | 20 ++
src/autotrain/app/models.py | 1 +
src/autotrain/app/params.py | 37 +++
src/autotrain/app/templates/index.html | 5 +
src/autotrain/app/ui_routes.py | 13 +
src/autotrain/backends/base.py | 4 +
src/autotrain/cli/autotrain.py | 2 +
src/autotrain/cli/run_image_regression.py | 104 ++++++++
src/autotrain/cli/utils.py | 23 ++
src/autotrain/commands.py | 16 +-
src/autotrain/dataset.py | 82 ++++++-
src/autotrain/parser.py | 9 +
src/autotrain/preprocessor/vision.py | 150 ++++++++++++
src/autotrain/project.py | 3 +
.../trainers/image_regression/__init__.py | 0
.../trainers/image_regression/__main__.py | 222 ++++++++++++++++++
.../trainers/image_regression/dataset.py | 24 ++
.../trainers/image_regression/params.py | 36 +++
.../trainers/image_regression/utils.py | 130 ++++++++++
.../trainers/text_regression/__main__.py | 7 +-
src/autotrain/utils.py | 3 +
28 files changed, 1036 insertions(+), 6 deletions(-)
create mode 100644 configs/image_classification/local.yml
create mode 100644 configs/image_scoring/hub_dataset.yml
create mode 100644 configs/image_scoring/local.yml
create mode 100644 docs/source/image_regression.mdx
create mode 100644 docs/source/image_regression_params.mdx
create mode 100644 src/autotrain/cli/run_image_regression.py
create mode 100644 src/autotrain/trainers/image_regression/__init__.py
create mode 100644 src/autotrain/trainers/image_regression/__main__.py
create mode 100644 src/autotrain/trainers/image_regression/dataset.py
create mode 100644 src/autotrain/trainers/image_regression/params.py
create mode 100644 src/autotrain/trainers/image_regression/utils.py
diff --git a/configs/image_classification/local.yml b/configs/image_classification/local.yml
new file mode 100644
index 0000000000..5d04f88332
--- /dev/null
+++ b/configs/image_classification/local.yml
@@ -0,0 +1,27 @@
+task: image_classification
+base_model: google/vit-base-patch16-224
+project_name: autotrain-image-classification-model
+log: tensorboard
+backend: local
+
+data:
+ path: data/
+ train_split: train # this folder inside data/ will be used for training, it contains the images in subfolders.
+ valid_split: null
+ column_mapping:
+ image_column: image
+ target_column: labels
+
+params:
+ epochs: 2
+ batch_size: 4
+ lr: 2e-5
+ optimizer: adamw_torch
+ scheduler: linear
+ gradient_accumulation: 1
+ mixed_precision: fp16
+
+hub:
+ username: ${HF_USERNAME}
+ token: ${HF_TOKEN}
+ push_to_hub: true
\ No newline at end of file
diff --git a/configs/image_scoring/hub_dataset.yml b/configs/image_scoring/hub_dataset.yml
new file mode 100644
index 0000000000..bdd2d76473
--- /dev/null
+++ b/configs/image_scoring/hub_dataset.yml
@@ -0,0 +1,27 @@
+task: image_regression
+base_model: google/vit-base-patch16-224
+project_name: autotrain-cats-vs-dogs-finetuned
+log: tensorboard
+backend: local
+
+data:
+ path: cats_vs_dogs
+ train_split: train
+ valid_split: null
+ column_mapping:
+ image_column: image
+ target_column: labels
+
+params:
+ epochs: 2
+ batch_size: 4
+ lr: 2e-5
+ optimizer: adamw_torch
+ scheduler: linear
+ gradient_accumulation: 1
+ mixed_precision: fp16
+
+hub:
+ username: ${HF_USERNAME}
+ token: ${HF_TOKEN}
+ push_to_hub: true
\ No newline at end of file
diff --git a/configs/image_scoring/local.yml b/configs/image_scoring/local.yml
new file mode 100644
index 0000000000..377cf227a7
--- /dev/null
+++ b/configs/image_scoring/local.yml
@@ -0,0 +1,28 @@
+task: image_regression
+base_model: google/vit-base-patch16-224
+project_name: autotrain-image-regression-model
+log: tensorboard
+backend: local
+
+data:
+ path: data/
+ train_split: train # this folder inside data/ will be used for training, it contains the images and metadata.jsonl
+ valid_split: valid # this folder inside data/ will be used for validation, it contains the images and metadata.jsonl. can be set to null
+ # column mapping should not be changed for local datasets
+ column_mapping:
+ image_column: image
+ target_column: target
+
+params:
+ epochs: 2
+ batch_size: 4
+ lr: 2e-5
+ optimizer: adamw_torch
+ scheduler: linear
+ gradient_accumulation: 1
+ mixed_precision: fp16
+
+hub:
+ username: ${HF_USERNAME}
+ token: ${HF_TOKEN}
+ push_to_hub: true
\ No newline at end of file
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 9b599f265a..839751992c 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -35,6 +35,8 @@
title: LLM Finetuning
- local: image_classification
title: Image Classification
+ - local: image_regression
+ title: Image Scoring/Regression
- local: object_detection
title: Object Detection
- local: dreambooth
@@ -53,6 +55,8 @@
title: LLM Finetuning
- local: image_classification_params
title: Image Classification
+ - local: image_regression_params
+ title: Image Scoring/Regression
- local: object_detection_params
title: Object Detection
- local: dreambooth_params
diff --git a/docs/source/image_regression.mdx b/docs/source/image_regression.mdx
new file mode 100644
index 0000000000..d6fb25fc38
--- /dev/null
+++ b/docs/source/image_regression.mdx
@@ -0,0 +1,58 @@
+# Image Scoring/Regression
+
+Image scoring is a form of supervised learning where a model is trained to predict a
+score or value for an image. AutoTrain simplifies the process, enabling you to train a
+state-of-the-art image scoring model by simply uploading labeled example images.
+
+
+## Preparing your data
+
+To ensure your image scoring model trains effectively, follow these guidelines for preparing your data:
+
+
+### Organizing Images
+
+
+Prepare a zip file containing your images and metadata.jsonl.
+
+
+```
+Archive.zip
+├── 0001.png
+├── 0002.png
+├── 0003.png
+├── .
+├── .
+├── .
+└── metadata.jsonl
+```
+
+Example for `metadata.jsonl`:
+
+```
+{"file_name": "0001.png", "target": 0.5}
+{"file_name": "0002.png", "target": 0.7}
+{"file_name": "0003.png", "target": 0.3}
+```
+
+Please note that metadata.jsonl should contain the `file_name` and the `target` value for each image.
+
+
+### Image Requirements
+
+- Format: Ensure all images are in JPEG, JPG, or PNG format.
+
+- Quantity: Include at least 5 images to provide the model with sufficient examples for learning.
+
+- Exclusivity: The zip file should exclusively contain images and metadata.jsonl.
+No additional files or nested folders should be included.
+
+
+Some points to keep in mind:
+
+- The images must be jpeg, jpg or png.
+- There should be at least 5 images per class.
+- There must not be any other files in the zip file.
+- There must not be any other folders inside the zip folder.
+
+When train.zip is decompressed, it creates no folders: only images and metadata.jsonl.
\ No newline at end of file
diff --git a/docs/source/image_regression_params.mdx b/docs/source/image_regression_params.mdx
new file mode 100644
index 0000000000..8434a0ab9d
--- /dev/null
+++ b/docs/source/image_regression_params.mdx
@@ -0,0 +1,3 @@
+# Image Scoring/Regression Parameters
+
+The Parameters for image scoring/regression are same as the parameters for image classification.
diff --git a/docs/source/object_detection.mdx b/docs/source/object_detection.mdx
index b8e6814d84..a05d1bdc29 100644
--- a/docs/source/object_detection.mdx
+++ b/docs/source/object_detection.mdx
@@ -50,10 +50,8 @@ No additional files or nested folders should be included.
Some points to keep in mind:
-- The zip file should contain multiple folders (the classes), each folder should contain images of a single class.
-- The name of the folder should be the name of the class.
- The images must be jpeg, jpg or png.
-- There should be at least 5 images per class.
+- There should be at least 5 images per split.
- There must not be any other files in the zip file.
- There must not be any other folders inside the zip folder.
diff --git a/src/autotrain/app/api_routes.py b/src/autotrain/app/api_routes.py
index f612605be7..c1db5b0a89 100644
--- a/src/autotrain/app/api_routes.py
+++ b/src/autotrain/app/api_routes.py
@@ -13,6 +13,7 @@
from autotrain.trainers.clm.params import LLMTrainingParams
from autotrain.trainers.dreambooth.params import DreamBoothTrainingParams
from autotrain.trainers.image_classification.params import ImageClassificationParams
+from autotrain.trainers.image_regression.params import ImageRegressionParams
from autotrain.trainers.sent_transformers.params import SentenceTransformersParams
from autotrain.trainers.seq2seq.params import Seq2SeqParams
from autotrain.trainers.tabular.params import TabularParams
@@ -86,6 +87,7 @@ def create_api_base_model(base_class, class_name):
TextRegressionParamsAPI = create_api_base_model(TextRegressionParams, "TextRegressionParamsAPI")
TokenClassificationParamsAPI = create_api_base_model(TokenClassificationParams, "TokenClassificationParamsAPI")
SentenceTransformersParamsAPI = create_api_base_model(SentenceTransformersParams, "SentenceTransformersParamsAPI")
+ImageRegressionParamsAPI = create_api_base_model(ImageRegressionParams, "ImageRegressionParamsAPI")
class LLMSFTColumnMapping(BaseModel):
@@ -122,6 +124,11 @@ class ImageClassificationColumnMapping(BaseModel):
target_column: str
+class ImageRegressionColumnMapping(BaseModel):
+ image_column: str
+ target_column: str
+
+
class Seq2SeqColumnMapping(BaseModel):
text_column: str
target_column: str
@@ -201,6 +208,7 @@ class APICreateProjectModel(BaseModel):
"text-regression",
"tabular-classification",
"tabular-regression",
+ "image-regression",
]
base_model: str
hardware: Literal[
@@ -232,6 +240,7 @@ class APICreateProjectModel(BaseModel):
TextClassificationParamsAPI,
TextRegressionParamsAPI,
TokenClassificationParamsAPI,
+ ImageRegressionParamsAPI,
]
username: str
column_mapping: Optional[
@@ -254,6 +263,7 @@ class APICreateProjectModel(BaseModel):
STPairScoreColumnMapping,
STTripletColumnMapping,
STQAColumnMapping,
+ ImageRegressionColumnMapping,
]
] = None
hub_dataset: str
@@ -408,6 +418,14 @@ def validate_column_mapping(cls, values):
if not values.get("column_mapping").get("sentence2_column"):
raise ValueError("sentence2_column is required for st:qa")
values["column_mapping"] = STQAColumnMapping(**values["column_mapping"])
+ elif values.get("task") == "image-regression":
+ if not values.get("column_mapping"):
+ raise ValueError("column_mapping is required for image-regression")
+ if not values.get("column_mapping").get("image_column"):
+ raise ValueError("image_column is required for image-regression")
+ if not values.get("column_mapping").get("target_column"):
+ raise ValueError("target_column is required for image-regression")
+ values["column_mapping"] = ImageRegressionColumnMapping(**values["column_mapping"])
return values
@model_validator(mode="before")
@@ -441,6 +459,8 @@ def validate_params(cls, values):
values["params"] = TokenClassificationParamsAPI(**values["params"])
elif values.get("task").startswith("st:"):
values["params"] = SentenceTransformersParamsAPI(**values["params"])
+ elif values.get("task") == "image-regression":
+ values["params"] = ImageRegressionParamsAPI(**values["params"])
return values
diff --git a/src/autotrain/app/models.py b/src/autotrain/app/models.py
index fe5d2e5f5a..a6e30544fb 100644
--- a/src/autotrain/app/models.py
+++ b/src/autotrain/app/models.py
@@ -316,6 +316,7 @@ def fetch_models():
_mc["text-classification"] = _fetch_text_classification_models()
_mc["llm"] = _fetch_llm_models()
_mc["image-classification"] = _fetch_image_classification_models()
+ _mc["image-regression"] = _fetch_image_classification_models()
_mc["dreambooth"] = _fetch_dreambooth_models()
_mc["seq2seq"] = _fetch_seq2seq_models()
_mc["token-classification"] = _fetch_token_classification_models()
diff --git a/src/autotrain/app/params.py b/src/autotrain/app/params.py
index edbdbfef54..0b822c1610 100644
--- a/src/autotrain/app/params.py
+++ b/src/autotrain/app/params.py
@@ -5,6 +5,7 @@
from autotrain.trainers.clm.params import LLMTrainingParams
from autotrain.trainers.dreambooth.params import DreamBoothTrainingParams
from autotrain.trainers.image_classification.params import ImageClassificationParams
+from autotrain.trainers.image_regression.params import ImageRegressionParams
from autotrain.trainers.object_detection.params import ObjectDetectionParams
from autotrain.trainers.sent_transformers.params import SentenceTransformersParams
from autotrain.trainers.seq2seq.params import Seq2SeqParams
@@ -126,6 +127,10 @@
mixed_precision="fp16",
log="tensorboard",
).model_dump()
+PARAMS["image-regression"] = ImageRegressionParams(
+ mixed_precision="fp16",
+ log="tensorboard",
+).model_dump()
@dataclass
@@ -168,6 +173,8 @@ def munge(self):
return self._munge_params_text_reg()
elif self.task.startswith("st:"):
return self._munge_params_sent_transformers()
+ elif self.task == "image-regression":
+ return self._munge_params_img_reg()
else:
raise ValueError(f"Unknown task: {self.task}")
@@ -315,6 +322,22 @@ def _munge_params_img_clf(self):
return ImageClassificationParams(**_params)
+ def _munge_params_img_reg(self):
+ _params = self._munge_common_params()
+ _params["model"] = self.base_model
+ _params["log"] = "tensorboard"
+ if not self.using_hub_dataset:
+ _params["image_column"] = "autotrain_image"
+ _params["target_column"] = "autotrain_label"
+ _params["valid_split"] = "validation"
+ else:
+ _params["image_column"] = self.column_mapping.get("image" if not self.api else "image_column", "image")
+ _params["target_column"] = self.column_mapping.get("target" if not self.api else "target_column", "target")
+ _params["train_split"] = self.train_split
+ _params["valid_split"] = self.valid_split
+
+ return ImageRegressionParams(**_params)
+
def _munge_params_img_obj_det(self):
_params = self._munge_common_params()
_params["model"] = self.base_model
@@ -511,6 +534,20 @@ def get_task_params(task, param_type):
"early_stopping_threshold",
]
task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params}
+ if task == "image-regression" and param_type == "basic":
+ more_hidden_params = [
+ "warmup_ratio",
+ "weight_decay",
+ "max_grad_norm",
+ "seed",
+ "logging_steps",
+ "auto_find_batch_size",
+ "save_total_limit",
+ "evaluation_strategy",
+ "early_stopping_patience",
+ "early_stopping_threshold",
+ ]
+ task_params = {k: v for k, v in task_params.items() if k not in more_hidden_params}
if task == "image-object-detection" and param_type == "basic":
more_hidden_params = [
"warmup_ratio",
diff --git a/src/autotrain/app/templates/index.html b/src/autotrain/app/templates/index.html
index 33995a6f91..397df5a410 100644
--- a/src/autotrain/app/templates/index.html
+++ b/src/autotrain/app/templates/index.html
@@ -76,6 +76,10 @@
fields = ['image', 'label'];
fieldNames = ['image', 'label'];
break;
+ case 'image-regression':
+ fields = ['image', 'label'];
+ fieldNames = ['image', 'target'];
+ break;
case 'image-object-detection':
fields = ['image', 'objects'];
fieldNames = ['image', 'objects'];
@@ -200,6 +204,7 @@