diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 2b64d49a..822a10af 100755
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -1,14 +1,17 @@
version: 2
+build:
+ os: ubuntu-22.04
+ tools:
+ python: "3.9"
+
python:
- version: 3.8
install:
- method: pip
path: .
extra_requirements:
- docs
- pytorch_m1
- - tensorflow
sphinx:
fail_on_warning: true
\ No newline at end of file
diff --git a/README.md b/README.md
index f919f978..a03d365b 100755
--- a/README.md
+++ b/README.md
@@ -9,6 +9,10 @@
[![status](https://joss.theoj.org/papers/447561ee2de4709eddb704e18bee846f/status.svg)](https://joss.theoj.org/papers/447561ee2de4709eddb704e18bee846f)
+---
+### :tada: Zoobot 2.0 is now available. Bigger and better models with streamlined finetuning. [Blog](https://walmsley.dev/posts/zoobot-scaling-laws), [paper](https://arxiv.org/abs/2404.02973) :tada:
+---
+
Zoobot classifies galaxy morphology with deep learning.
@@ -17,11 +21,13 @@ Zoobot is trained using millions of answers by Galaxy Zoo volunteers. This code
- [Install](#installation)
- [Quickstart](#quickstart)
- [Worked Examples](#worked-examples)
-- [Pretrained Weights](https://zoobot.readthedocs.io/en/latest/data_notes.html)
+- [Pretrained Weights](https://zoobot.readthedocs.io/en/latest/pretrained_models.html)
- [Datasets](https://www.github.com/mwalmsley/galaxy-datasets)
- [Documentation](https://zoobot.readthedocs.io/) (for understanding/reference)
+- [Mailing List](https://groups.google.com/g/zoobot) (for updates)
## Installation
+
You can retrain Zoobot in the cloud with a free GPU using this [Google Colab notebook](https://colab.research.google.com/drive/1A_-M3Sz5maQmyfW2A7rEu-g_Zi0RMGz5?usp=sharing). To install locally, keep reading.
@@ -47,6 +53,7 @@ To use a GPU, you must *already* have CUDA installed and matching the versions a
I share my install steps [here](#install_cuda). GPUs are optional - Zoobot will run retrain fine on CPU, just slower.
## Quickstart
+
The [Colab notebook](https://colab.research.google.com/drive/1A_-M3Sz5maQmyfW2A7rEu-g_Zi0RMGz5?usp=sharing) is the quickest way to get started. Alternatively, the minimal example below illustrates how Zoobot works.
@@ -54,59 +61,59 @@ The [Colab notebook](https://colab.research.google.com/drive/1A_-M3Sz5maQmyfW2A7
Let's say you want to find ringed galaxies and you have a small labelled dataset of 500 ringed or not-ringed galaxies. You can retrain Zoobot to find rings like so:
```python
+import pandas as pd
+from galaxy_datasets.pytorch.galaxy_datamodule import GalaxyDataModule
+from zoobot.pytorch.training import finetune
+
+# csv with 'ring' column (0 or 1) and 'file_loc' column (path to image)
+labelled_df = pd.read_csv('/your/path/some_labelled_galaxies.csv')
+
+datamodule = GalaxyDataModule(
+ label_cols=['ring'],
+ catalog=labelled_df,
+ batch_size=32
+)
+
+# load trained Zoobot model
+model = finetune.FinetuneableZoobotClassifier(checkpoint_loc, num_classes=2)
- import pandas as pd
- from galaxy_datasets.pytorch.galaxy_datamodule import GalaxyDataModule
- from zoobot.pytorch.training import finetune
-
- # csv with 'ring' column (0 or 1) and 'file_loc' column (path to image)
- labelled_df = pd.read_csv('/your/path/some_labelled_galaxies.csv')
-
- datamodule = GalaxyDataModule(
- label_cols=['ring'],
- catalog=labelled_df,
- batch_size=32
- )
-
- # load trained Zoobot model
- model = finetune.FinetuneableZoobotClassifier(checkpoint_loc, num_classes=2)
-
- # retrain to find rings
- trainer = finetune.get_trainer(save_dir)
- trainer.fit(model, datamodule)
+# retrain to find rings
+trainer = finetune.get_trainer(save_dir)
+trainer.fit(model, datamodule)
```
Then you can make predict if new galaxies have rings:
```python
- from zoobot.pytorch.predictions import predict_on_catalog
+from zoobot.pytorch.predictions import predict_on_catalog
- # csv with 'file_loc' column (path to image). Zoobot will predict the labels.
- unlabelled_df = pd.read_csv('/your/path/some_unlabelled_galaxies.csv')
+# csv with 'file_loc' column (path to image). Zoobot will predict the labels.
+unlabelled_df = pd.read_csv('/your/path/some_unlabelled_galaxies.csv')
- predict_on_catalog.predict(
- unlabelled_df,
- model,
- label_cols=['ring'], # only used for
- save_loc='/your/path/finetuned_predictions.csv'
- )
+predict_on_catalog.predict(
+ unlabelled_df,
+ model,
+ label_cols=['ring'], # only used for
+ save_loc='/your/path/finetuned_predictions.csv'
+)
```
Zoobot includes many guides and working examples - see the [Getting Started](#getting-started) section below.
## Getting Started
+
I suggest starting with the [Colab notebook](https://colab.research.google.com/drive/1A_-M3Sz5maQmyfW2A7rEu-g_Zi0RMGz5?usp=sharing) or the worked examples below, which you can copy and adapt.
For context and explanation, see the [documentation](https://zoobot.readthedocs.io/).
-For pretrained model weights, precalculated representations, catalogues, and so forth, see the [data notes](https://zoobot.readthedocs.io/en/latest/data_notes.html) in particular.
+Pretrained models are listed [here](https://zoobot.readthedocs.io/en/latest/pretrained_models.html) and available on [HuggingFace](https://huggingface.co/collections/mwalmsley/zoobot-encoders-65fa14ae92911b173712b874)
### Worked Examples
+
-PyTorch (recommended):
- [pytorch/examples/finetuning/finetune_binary_classification.py](https://github.com/mwalmsley/zoobot/blob/main/zoobot/pytorch/examples/finetuning/finetune_binary_classification.py)
- [pytorch/examples/finetuning/finetune_counts_full_tree.py](https://github.com/mwalmsley/zoobot/blob/main/zoobot/pytorch/examples/finetuning/finetune_counts_full_tree.py)
- [pytorch/examples/representations/get_representations.py](https://github.com/mwalmsley/zoobot/blob/main/zoobot/pytorch/examples/representations/get_representations.py)
@@ -114,18 +121,14 @@ PyTorch (recommended):
There is more explanation and an API reference on the [docs](https://zoobot.readthedocs.io/).
-I also [include](https://github.com/mwalmsley/zoobot/blob/main/benchmarks) the scripts used to create and benchmark our pretrained models. Many pretrained models are available [already](https://zoobot.readthedocs.io/en/latest/data_notes.html), but if you need one trained on e.g. different input image sizes or with a specific architecture, I can probably make it for you.
-
-When trained with a decision tree head (ZoobotTree, FinetuneableZoobotTree), Zoobot can learn from volunteer labels of varying confidence and predict posteriors for what the typical volunteer might say. Specifically, this Zoobot mode predicts the parameters for distributions, not simple class labels! For a demonstration of how to interpret these predictions, see the [gz_decals_data_release_analysis_demo.ipynb](https://github.com/mwalmsley/zoobot/blob/main/gz_decals_data_release_analysis_demo.ipynb).
-
-
### (Optional) Install PyTorch with CUDA
+
*If you're not using a GPU, skip this step. Use the pytorch-cpu option in the section below.*
-Install PyTorch 2.1.0 or Tensorflow 2.10.0 and compatible CUDA drivers. I highly recommend using [conda](https://docs.conda.io/en/latest/miniconda.html) to do this. Conda will handle both creating a new virtual environment (`conda create`) and installing CUDA (`cudatoolkit`, `cudnn`)
+Install PyTorch 2.1.0 and compatible CUDA drivers. I highly recommend using [conda](https://docs.conda.io/en/latest/miniconda.html) to do this. Conda will handle both creating a new virtual environment (`conda create`) and installing CUDA (`cudatoolkit`, `cudnn`)
CUDA 12.1 for PyTorch 2.1.0:
@@ -135,6 +138,7 @@ CUDA 12.1 for PyTorch 2.1.0:
### Recent release features (v2.0.0)
+- **New in 2.0.1** Add greyscale encoders. Use `hf_hub:mwalmsley/zoobot-encoder-greyscale-convnext_nano` or [similar](https://huggingface.co/collections/mwalmsley/zoobot-encoders-greyscale-66427c51133285ca01b490c6).
- New pretrained architectures: ConvNeXT, EfficientNetV2, MaxViT, and more. Each in several sizes.
- Reworked finetuning procedure. All these architectures are finetuneable through a common method.
- Reworked finetuning options. Batch norm finetuning removed. Cosine schedule option added.
@@ -152,11 +156,11 @@ Contributions are very welcome and will be credited in any future work. Please g
### Benchmarks and Replication - Training from Scratch
-The [benchmarks](https://github.com/mwalmsley/zoobot/blob/main/benchmarks) folder contains slurm and Python scripts to train Zoobot from scratch. We use these scripts to make sure new code versions work well, and that TensorFlow and PyTorch achieve similar performance.
+The [benchmarks](https://github.com/mwalmsley/zoobot/blob/main/benchmarks) folder contains slurm and Python scripts to train Zoobot 1.0 from scratch.
Training Zoobot using the GZ DECaLS dataset option will create models very similar to those used for the GZ DECaLS catalogue and shared with the early versions of this repo. The GZ DESI Zoobot model is trained on additional data (GZD-1, GZD-2), as the GZ Evo Zoobot model (GZD-1/2/5, Hubble, Candels, GZ2).
-**Pretraining is becoming increasingly complex and is now partially refactored out to a separate repository. We are gradually migrating this `zoobot` repository to focus on finetuning.**
+*Pretraining is becoming increasingly complex and is now partially refactored out to a separate repository. We are gradually migrating this `zoobot` repository to focus on finetuning.*
### Citing
@@ -174,6 +178,7 @@ You might be interested in reading papers using Zoobot:
- [Galaxy Zoo DESI: Detailed morphology measurements for 8.7M galaxies in the DESI Legacy Imaging Surveys](https://academic.oup.com/mnras/advance-article/doi/10.1093/mnras/stad2919/7283169?login=false) (2023)
- [Galaxy mergers in Subaru HSC-SSP: A deep representation learning approach for identification, and the role of environment on merger incidence](https://doi.org/10.1051/0004-6361/202346743) (2023)
- [Astronomaly at Scale: Searching for Anomalies Amongst 4 Million Galaxies](https://arxiv.org/abs/2309.08660) (2023, submitted)
-- [Transfer learning for galaxy feature detection: Finding Giant Star-forming Clumps in low redshift galaxies using Faster R-CNN](https://arxiv.org/abs/2312.03503) (2023, submitted)
+- [Transfer learning for galaxy feature detection: Finding Giant Star-forming Clumps in low redshift galaxies using Faster R-CNN](https://arxiv.org/abs/2312.03503) (2023)
+- [Euclid preparation. Measuring detailed galaxy morphologies for Euclid with Machine Learning](https://arxiv.org/abs/2402.10187) (2024, submitted)
Many other works use Zoobot indirectly via the [Galaxy Zoo DECaLS](https://arxiv.org/abs/2102.08414) catalog (and now via the new [Galaxy Zoo DESI](https://academic.oup.com/mnras/advance-article/doi/10.1093/mnras/stad2919/7283169?login=false) catalog).
diff --git a/docs/conf.py b/docs/conf.py
index 227ce95e..87c633f5 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -19,11 +19,11 @@
# -- Project information -----------------------------------------------------
project = 'Zoobot'
-copyright = '2023, Mike Walmsley'
+copyright = '2024, Mike Walmsley'
author = 'Mike Walmsley'
# The full version, including alpha/beta/rc tags
-release = '0.0.4'
+release = '2.0'
# -- General configuration ---------------------------------------------------
@@ -33,7 +33,8 @@
# ones.
extensions = [
'sphinx.ext.autodoc', # import docs from code
- 'sphinx.ext.napoleon' # google docstrings
+ 'sphinx.ext.napoleon', # google docstrings
+ 'sphinxemoji.sphinxemoji', # emoji support https://sphinxemojicodes.readthedocs.io/en/stable/
]
# Add any paths that contain templates here, relative to this directory.
diff --git a/docs/data_notes.rst b/docs/data_notes.rst
deleted file mode 100755
index e6ce0a4f..00000000
--- a/docs/data_notes.rst
+++ /dev/null
@@ -1,127 +0,0 @@
-.. _datanotes:
-
-Pretrained Models
-=================
-
-Zoobot includes weights for the following pretrained models.
-
-.. list-table:: PyTorch Models
- :widths: 70 35 35 35 35
- :header-rows: 1
-
- * - Architecture
- - Input Size
- - Channels
- - Finetune
- - Link
- * - EfficientNetB0
- - 224px
- - 1
- - Yes
- - `Link `__
- * - EfficientNetB0
- - 300px
- - 1
- - Yes
- - `Link `__
- * - EfficientNetB0
- - 224px
- - 3
- - Yes
- - `Link `__
- * - ResNet50
- - 300px
- - 1
- - Yes
- - `Link `__
- * - ResNet50
- - 224px
- - 1
- - Yes
- - `Link `__
- * - ResNet18
- - 300px
- - 1
- - Yes
- - `Link `__
- * - ResNet18
- - 224px
- - 1
- - Yes
- - `Link `__
- * - Max-ViT Tiny
- - 224px
- - 1
- - Yes
- - `Link `__
- * - Max-ViT Tiny
- - 224px
- - 3
- - Yes
- - `Link `__
-
-
-
-.. list-table:: TensorFlow Models
- :widths: 70 35 35 35 35
- :header-rows: 1
-
- * - Architecture
- - Input Size
- - Channels
- - Finetune
- - Link
- * - EfficientNetB0
- - 300px
- - 1
- - Yes
- - `Link `__
- * - EfficientNetB0
- - 224px
- - 1
- - Yes
- - WIP
-
-
-.. note::
-
- Missing a model you need? Reach out! There's a good chance we can train any small-ish model supported by `timm `_.
-
-All models are trained on the GZ Evo dataset described in the `Towards Foundation Models paper `_.
-This dataset includes 550k galaxy images and 92M votes drawn from every major Galaxy Zoo campaign: GZ2, GZ Hubble, GZ CANDELS, and GZ DECaLS/DESI.
-
-All models are trained on the same images shown to Galaxy Zoo volunteers.
-These are typically 424 pixels across.
-The images are transformed using the galaxy-datasets default transforms (random off-center crop/zoom, flips, rotation) and then resized to the desired input size (224px or 300px) and, for 1-channel models, channel-averaged.
-
-We also include a few additional ad-hoc models `on Dropbox `_.
-
-- EfficientNetB0 models pretrained only on GZ DECaLS GZD-5. For reference/comparison.
-- EfficientNetB0 models pretrained with smaller images (128px and 64px). For debugging.
-
-
-Which model should I use?
---------------------------
-
-We suggest the PyTorch EfficientNetB0 224-pixel model for most users.
-
-Zoobot will prioritise PyTorch going forward. For more, see here.
-The TensorFlow models currently perform just as well as the PyTorch equivalents but will not benefit from any future updates.
-
-EfficientNetB0 is a small yet capable modern architecture.
-The ResNet50 models perform slightly worse than EfficientNet, but are a very common architecture and may be useful as benchmarks or as part of other frameworks (like detectron2, for segmentation).
-
-It's unclear if color information improves overall performance at predicting GZ votes.
-For CNNs, the change in performance is not significant. For ViT, it is measureable but small.
-We suggesst including color if it is expected to be important to your specific task, such as hunting green peas.
-
-Larger input images (300px vs 224px) may provide a small boost in performance at predicting GZ votes.
-However, the models require more memory and train/finetune slightly more slowly.
-You may want to start with a 224px model and experiment with "upgrading" once you're happy everything works.
-
-
-What about the images?
---------------------------
-
-You can find most of our datasets on the `galaxy-datasets repo `_.
-The datasets are self-downloading and have loading functions for both PyTorch and TensorFlow.
diff --git a/docs/guides/advanced_finetuning.rst b/docs/guides/advanced_finetuning.rst
index 6554f69c..767703c7 100644
--- a/docs/guides/advanced_finetuning.rst
+++ b/docs/guides/advanced_finetuning.rst
@@ -31,37 +31,7 @@ or, because Zoobot encoders are `timm` models, you can just directly use `timm`:
You can use it like any other `timm` model. For example, we did this to `add contrastive learning `_. Good luck!
-
-
-Subclassing FinetuneableZoobotAbstract
----------------------------------------
-
-If you'd like to finetune Zoobot on a new task that isn't classification, regression, or vote counts,
-you could instead subclass :class:`zoobot.pytorch.training.finetune.FinetuneableZoobotAbstract`.
-This lets you use our finetuning code with your own head and loss functions.
-
-Imagine there wasn't a regression version and you wanted to finetune Zoobot on a regression task. You could do:
-
-.. code-block:: python
-
-
- class FinetuneableZoobotCustomRegression(FinetuneableZoobotAbstract):
-
- def __init__(
- self,
- foo,
- **super_kwargs
- ):
-
- super().__init__(**super_kwargs)
-
- self.foo = foo
- self.loss = torch.nn.SomeCrazyLoss()
- self.head = torch.nn.Sequential(my_crazy_head)
-
- # see zoobot/pytorch/training/finetune.py for more examples and all methods required
-
-You can then finetune this new class just as with e.g. :class:`zoobot.pytorch.training.finetune.FinetuneableZoobotRegressor`.
+If you don't need to change the encoder and just want representations, see below.
Extracting Frozen Representations
@@ -71,7 +41,7 @@ Once you've finetuned to your survey, or if you're using a pretrained survey, (S
the representations can be stored as frozen vectors and used as features.
We use this at Galaxy Zoo to power our upcoming similary search and anomaly-finding tools.
-As above, we can get Zoobot's encoder from the .encoder attribute. We could use ``encoder()`` to calculate our representations.
+As above, we can get Zoobot's encoder from the .encoder attribute. We could use ``encoder.forward()`` to calculate our representations.
But then we'd have to deal with batching, looping, etc.
To avoid this boilerplate, Zoobot includes a PyTorch Lightning class that lets you pass ``encoder`` to the same :func:`zoobot.pytorch.predictions.predict_on_catalog.predict`
utility function used for making predictions with a full Zoobot model.
@@ -95,9 +65,41 @@ utility function used for making predictions with a full Zoobot model.
See `zoobot/pytorch/examples/representations `_ for a full working example.
-We are sharing precalculated representations for all our DESI galaxies, and soon for HSC as well.
-Check the data notes at :doc:/data_notes
+We have precalculated representations for all our DESI galaxies, and soon for HSC as well.
+See :doc:`/science_data`.
The representations are typically quite high-dimensional (e.g. 1280 for EfficientNetB0) and therefore highly redundant.
We suggest using PCA to compress them down to a more reasonable dimension (e.g. 40) while preserving most of the information.
This was our approach in the `Practical Morphology Tools paper `_.
+
+
+Subclassing FinetuneableZoobotAbstract
+---------------------------------------
+
+If you'd like to finetune Zoobot on a new task that isn't classification, regression, or vote counts,
+you could instead subclass :class:`zoobot.pytorch.training.finetune.FinetuneableZoobotAbstract`.
+This lets you use our finetuning code with your own head and loss functions.
+
+Imagine there wasn't a regression version and you wanted to finetune Zoobot on a regression task. You could do:
+
+.. code-block:: python
+
+
+ class FinetuneableZoobotCustomRegression(FinetuneableZoobotAbstract):
+
+ def __init__(
+ self,
+ foo,
+ **super_kwargs
+ ):
+
+ super().__init__(**super_kwargs)
+
+ self.foo = foo
+ self.loss = torch.nn.SomeCrazyLoss()
+ self.head = torch.nn.Sequential(my_crazy_head)
+
+ # see zoobot/pytorch/training/finetune.py for more examples and all methods required
+
+You can then finetune this new class just as with e.g. :class:`zoobot.pytorch.training.finetune.FinetuneableZoobotRegressor`.
+
diff --git a/docs/guides/choosing_parameters.rst b/docs/guides/choosing_parameters.rst
new file mode 100644
index 00000000..9cd4b337
--- /dev/null
+++ b/docs/guides/choosing_parameters.rst
@@ -0,0 +1,101 @@
+.. _choosing_parameters:
+
+Choosing Parameters
+=====================================
+
+All FinetuneableZoobot classes share a common set of parameters for controlling the finetuning process. These can have a big effect on performance.
+
+
+Finetuning is fast and easy to experiment with, so we recommend trying different parameters to see what works best for your dataset.
+This guide provides some explanation for each option.
+
+We list the key parameters below in rough order of importance.
+See :class:`zoobot.pytorch.training.finetune.FinetuneableZoobotAbstract` for the full list of parameters.
+
+``learning_rate``
+...............................
+
+Learning rate sets how fast the model parameters are updated during training.
+Zoobot uses the adaptive optimizer ``AdamW``.
+Adaptive optimizers adjust the learning rate for each parameter based on the mean and variance of the previous gradients.
+This means you don't need to tune the learning rate as carefully as you would with a fixed learning rate optimizer like SGD.
+We find a learning of 1e-4 is a good starting point for most tasks.
+
+If you find the model is not learning, you can try increasing the learning rate.
+If you see the model loss is varying wildly, or the train loss decreases much faster than the validation loss (overfitting), you can try decreasing the learning rate.
+Increasing ``n_blocks`` (below) often requires a lower learning rate, as the model will adjust more parameters for each batch.
+
+
+``n_blocks``
+...............................
+
+Deep learning models are often divided into blocks of layers.
+For example, a ResNet model might have 4 blocks, each containing a number of convolutional layers.
+The ``n_blocks`` parameter specifies how many of these blocks to finetune.
+
+By default, ``n_blocks=0``, and so only the head is finetuned.
+This is a good choice when you have a small dataset or when you want to avoid overfitting.
+Finetuning only the head is sometimes called transfer learning.
+It's equivalent to calculating representations with the pretrained model and then training a new one-layer model on top of those representations.
+
+You can experiment with increasing ``n_blocks`` to finetune more of the model.
+This works best for larger datasets (typically more than 1k examples).
+To finetune the full model, keep increasing ``n_blocks``; Zoobot will raise an error if you try to finetune more blocks than the model has.
+Our recommended encoder, ``ConvNext``, has 5 blocks.
+
+
+``lr_decay``
+...............................
+
+The common intuition for deep learning is that lower blocks (near the input) learn simple general features and higher blocks (near the output) learn more complex features specific to your task.
+It is often useful to adjust the learning rate to be lower for lower blocks, which have already been pretrained to recognise simple galaxy features.
+
+Learning rate decay reduces the learning rate by block.
+For example, with ``learning_rate=1e-4`` and ``lr_decay=0.75`` (the default):
+
+* The highest block has a learning rate of 1e-4 * (0.75^0) = 1e-4
+* The second-highest block has a learning rate of 1e-4 * (0.75^1) = 7.5e-5
+* The third-highest block has a learning rate of 1e-4 * (0.75^2) = 5.6e-5
+
+and so on.
+
+Decreasing ``lr_decay`` will exponentially decrease the learning rate for lower blocks.
+
+In the extreme cases:
+
+* Setting ``learning_rate=0`` will disable learning in all blocks except the first block (0^0=1), equivalent to ``n_blocks=1``.
+* Setting ``lr_decay=1`` will give all blocks the same learning rate.
+
+The head always uses the full learning rate.
+
+``weight_decay``
+...............................
+
+Weight decay is a regularization term that penalizes large weights.
+When using Zoobot's default ``AdamW`` optimizer, it is closely related to L2 regularization, though there's some subtlety - see https://arxiv.org/abs/1711.05101.
+Increasing weight decay will increase the penalty on large weights, which can help prevent overfitting, but may slow or even stop training.
+By default, Zoobot uses a small weight decay of 0.05.
+
+
+``dropout_prob``
+...............................
+
+Dropout is a regularization technique that randomly sets some activations to zero during training.
+Similarly to weight decay, dropout can help prevent overfitting.
+Zoobot uses a dropout probability of 0.5 by default.
+
+
+``cosine_schedule`` and friends
+.................................
+
+Gradually reduce the learning rate during training can slightly improve results by finding a better minimum near convergence.
+This process is called learning rate scheduling.
+Zoobot includes a cosine learning rate schedule, which reduces the learning rate according to a cosine function.
+
+The cosine schedule is controlled by the following parameters:
+
+* ``cosine_schedule`` to enable the scheduler.
+* ``warmup_epochs`` to linearly increase the learning rate from 0 to ``learning_rate`` over the first ``warmup_epochs`` epochs, before applying cosine scheduling.
+* ``max_cosine_epochs`` sets how many epochs it takes to decay to the final learning rate (below). Warmup epochs don't count.
+* ``max_learning_rate_reduction_factor`` controls the final learning rate (``learning_rate`` * ``max_learning_rate_reduction_factor``).
+
\ No newline at end of file
diff --git a/docs/guides/finetuning.rst b/docs/guides/finetuning.rst
index d46eee1b..bce4fb56 100755
--- a/docs/guides/finetuning.rst
+++ b/docs/guides/finetuning.rst
@@ -30,12 +30,10 @@ Examples
Zoobot includes many working examples of finetuning:
-- `Google Colab notebook `__ (for binary classification in the cloud)
+- `Google Colab notebook `__ (recommended starting point)
- `finetune_binary_classification.py `__ (script version of the Colab notebook)
- `finetune_counts_full_tree.py `__ (for finetuning on a complicated GZ-style decision tree)
-There are also `examples `__ with the TensorFlow version of Zoobot. But this is no longer actively developed so we strongly suggest using the PyTorch version if possible.
-
Below, for less familiar readers, we walk through the ``finetune_binary_classification.py`` example in detail.
Background
@@ -60,12 +58,12 @@ These files are called checkpoints (like video game save files - computer scient
.. code-block:: python
model = finetune.FinetuneableZoobotClassifier(
- checkpoint_loc=checkpoint_loc, # loads weights from here
+ name='hf_hub:mwalmsley/zoobot-encoder-convnext_nano', # which pretrained model to download
num_classes=2,
n_layers=0
)
-You can download a checkpoint file from :ref:`datanotes`.
+You can see the list of pretrained models at :doc:`/pretrained_models`.
What about the other arguments?
When loading the checkpoint, FinetuneableZoobotClassifier will automatically change the head layer to suit a classification problem (hence, ``Classifier``).
diff --git a/docs/guides/guides.rst b/docs/guides/guides.rst
deleted file mode 100755
index 1de9e932..00000000
--- a/docs/guides/guides.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-
-Guides
-======
-
-Below are some practical guides for tasks that we hope Zoobot will be helpful for.
-
-.. toctree::
- :maxdepth: 2
-
- /guides/finetuning
- /guides/advanced_finetuning
- /guides/how_the_code_fits_together
- /guides/loading_data
- /guides/training_on_vote_counts
-
-If you'd prefer worked examples, you can find those under `zoobot/pytorch/examples `_.
diff --git a/docs/guides/how_the_code_fits_together.rst b/docs/guides/how_the_code_fits_together.rst
index 9c816ad5..437bcbc7 100644
--- a/docs/guides/how_the_code_fits_together.rst
+++ b/docs/guides/how_the_code_fits_together.rst
@@ -10,6 +10,7 @@ The Map
-------------------------
The Zoobot package has two roles:
+
1. **Finetuning**: ``pytorch/training/finetune.py`` is the heart of the package. You will use these classes to load pretrained models and finetune them on new data.
2. **Training from Scratch** ``pytorch/estimators/define_model.py`` and ``pytorch/training/train_with_pytorch_lightning.py`` create and train the Zoobot models from scratch. These are *not required* for finetuning and will eventually be migrated out.
@@ -20,6 +21,7 @@ Finetuning with Zoobot Classes
There are three Zoobot classes for finetuning:
+
1. :class:`FinetuneableZoobotClassifier ` for classification tasks (including multi-class).
2. :class:`FinetuneableZoobotRegressor ` for regression tasks (including on a unit interval e.g. a fraction).
3. :class:`FinetuneableZoobotTree ` for training on a tree of labels (e.g. Galaxy Zoo vote counts).
diff --git a/docs/index.rst b/docs/index.rst
index 64fbcac4..4f062cf3 100755
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -24,12 +24,23 @@ For more explanation, read on.
User Guides
-------------
-We've written these guides to add explanation and context.
+These introductory guides add context to the demo Colab notebooks.
+
+.. toctree::
+ :maxdepth: 1
+
+ /guides/finetuning
+ /guides/choosing_parameters
+ /guides/loading_data
+ /guides/training_on_vote_counts
+
+These advanced guides explain how to integrate Zoobot into other ML projects.
.. toctree::
:maxdepth: 2
- /guides/guides
+ /guides/advanced_finetuning
+ /guides/how_the_code_fits_together
Pretrained Models
------------------
@@ -39,7 +50,21 @@ To choose and download a pretrained model, see here.
.. toctree::
:maxdepth: 2
- data_notes
+ pretrained_models
+
+
+Science-Ready Data
+------------------
+
+You can find our science outputs (e.g. morphology catalogs, precalculated representations) here.
+
+.. toctree::
+ :maxdepth: 2
+
+ science_data
+
+We are working on releasing the compiled GZ Evo dataset and will update this page when it is available.
+Estimated public release is Q4 2024. Please reach out if you'd like early access.
API reference
diff --git a/docs/pretrained_models.rst b/docs/pretrained_models.rst
new file mode 100755
index 00000000..b590e168
--- /dev/null
+++ b/docs/pretrained_models.rst
@@ -0,0 +1,145 @@
+.. pretrainedmodels:
+
+Pretrained Models
+------------------
+
+Loading Models
+==========================
+
+Pretrained models are available via HuggingFace (|:hugging:|) with
+
+.. code-block:: python
+
+ from zoobot.pytorch.training.finetune import FinetuneableZoobotClassifier
+ # or FinetuneableZoobotRegressor, or FinetuneableZoobotTree
+
+ model = FinetuneableZoobotClassifier(name='hf_hub:mwalmsley/zoobot-encoder-convnext_nano')
+
+For more options (e.g. loading the ``timm`` encoder directly) see :doc:`guides/advanced_finetuning`.
+
+Available Models
+==========================
+
+Zoobot includes weights for the following pretrained models:
+
+
+.. list-table::
+ :widths: 70 35 35 35 35
+ :header-rows: 1
+
+ * - Architecture
+ - Parameters
+ - Test loss
+ - Finetune
+ - HF |:hugging:|
+ * - ConvNeXT-Pico
+ - 9.1M
+ - 19.33
+ - Yes
+ - `Link `__
+ * - ConvNeXT-Nano
+ - 15.6M
+ - 19.23
+ - Yes
+ - `Link `__
+ * - ConvNeXT-Tiny
+ - 44.6M
+ - 19.08
+ - Yes
+ - `Link `__
+ * - ConvNeXT-Small
+ - 58.5M
+ - 19.06
+ - Yes
+ - `Link `__
+ * - ConvNeXT-Base
+ - 88.6M
+ - **19.05**
+ - Yes
+ - `Link `__
+ * - ConvNeXT-Large
+ - 197.8M
+ - 19.09
+ - Yes
+ - `Link `__
+ * - MaxViT-Tiny
+ - 29.1M
+ - 19.22
+ - Yes
+ - `Link `__
+ * - MaxViT-Small
+ - 64.9M
+ - 19.20
+ - Yes
+ - `Link `__
+ * - MaxViT-Base
+ - 124.5
+ - 19.09
+ - Yes
+ - `Link `__
+ * - Max-ViT-Large
+ - 211.8M
+ - 19.18
+ - Yes
+ - `Link `__
+ * - EfficientNetB0
+ - 5.33M
+ - 19.48
+ - Yes
+ - WIP
+ * - EfficientNetV2-S
+ - 48.3M
+ - 19.33
+ - Yes
+ - WIP
+ * - ResNet18
+ - 11.7M
+ - 19.83
+ - Yes
+ - `Link `__
+ * - ResNet50
+ - 25.6M
+ - 19.43
+ - Yes
+ - `Link `__
+ * - ResNet101
+ - 44.5M
+ - 19.37
+ - Yes
+ - `Link `__
+
+
+.. note::
+
+ Missing a model you need? Reach out! There's a good chance we can train any model supported by `timm `_.
+
+.. note::
+
+ New in Zoobot v2.0.1: greyscale (single channel) versions are available `here `_.
+
+Which model should I use?
+===========================
+
+We suggest starting with ConvNeXT-Nano for most users.
+ConvNeXT-Nano performs very well while still being small enough to train on a single gaming GPU.
+You will be able to experiment quickly.
+
+For maximum performance, you could swap ConvNeXT-Nano for ConvNeXT-Small or ConvNeXT-Base.
+MaxViT-Base also performs well and includes an ingenious attention mechanism, if you're interested in that.
+All these models are much larger and need cluster-grade GPUs (e.g. V100 or above).
+
+Other models are included for reference or as benchmarks.
+EfficientNetB0 is equivalent to the model used in the GZ DECaLS and GZ DESI papers.
+ResNet18 and ResNet50 are classics of the genre and may be useful for comparison or as part of other frameworks (like as an `object detection backbone `_).
+
+
+How were the models trained?
+===============================
+
+The models were trained as part of the report `Scaling Laws for Galaxy Images `_.
+This report systematically investigates how increasing labelled galaxy data and model size improves performance
+and leads to adaptable models that generalise well to new tasks and new telescopes.
+
+All models are trained on the GZ Evo dataset,
+which includes 820k images and 100M+ volunteer votes drawn from every major Galaxy Zoo campaign: GZ2, GZ UKIDSS (unpublished), GZ Hubble, GZ CANDELS, GZ DECaLS/DESI, and GZ Cosmic Dawn (HSC, in prep.).
+They learn an adaptable representation of galaxy images by training to answer every Galaxy Zoo question at once.
diff --git a/docs/science_data.rst b/docs/science_data.rst
new file mode 100644
index 00000000..569a3110
--- /dev/null
+++ b/docs/science_data.rst
@@ -0,0 +1,59 @@
+.. sciencedata:
+
+Science Data
+-------------
+
+The goal of Zoobot is to do science. Here are some science-ready datasets created with Zoobot.
+
+Precalulated Representations
+=============================
+
+.. warning::
+
+ New for Zoobot v2! We're really excited to see what you build. Reach out for help.
+
+Zoobot v2 now includes precalculated representations for galaxies in the Galaxy Zoo DESI data release.
+Download `here `_ (2.5GB)
+
+You could use these to power a similarity search, anomaly recommendation system, the vision part of a multi-modal model,
+or really anything else that needs a short vector summarizing the morphology in a galaxy image.
+
+
+
+
+.. list-table::
+ :widths: 35 35 35 35 35 35
+ :header-rows: 1
+
+ * - id_str
+ - ra
+ - dec
+ - feat_pca_0
+ - feat_pca_1
+ - ...
+ * - 303240_2499
+ - 4.021870
+ - 3.512972
+ - 0.257407
+ - -7.414328
+ - ...
+
+``id_str`` is the unique identifier for the galaxy in the DESI Legacy Surveys DR8 release and can be crossmatched with the GZ DESI catalog (below) ``dr8_id`` key.
+It is formed with ``{brickid}_{objid}`` where brickid is the unique identifier for the brick in the Legacy Surveys and objid is the unique identifier for the object in the brick.
+``RA`` and ``Dec`` are in degrees.
+The PCA features are the first 40 principal components representation (which is otherwse impractically large to work with).
+
+
+Galaxy Zoo Morphology
+=======================
+
+Zoobot was used to create a detailed morphology catalog for every (extended, brighter than r=19) galaxy in the DESI Legacy Surveys (8.7M galaxies).
+The catalog and schema are available from `Zenodo `_.
+For new users, we suggest starting with the ``gz_desi_deep_learning_catalog_friendly.parquet`` catalog file.
+
+We previously used Zoobot to create a similar catalog for `DECaLS DR5 `_.
+This has now been superceded by the GZ DESI catalog above (which includes the same galaxies, and many more).
+
+We aim to provide both representations and an updated morphology catalog for DESI-LS DR10, but we need to redownload all the images first |:neutral_face:|.
+
+Future catalogs will include morphology measurements for HSC, JWST, and Euclid galaxies (likely in that order).
diff --git a/setup.py b/setup.py
index 9c50ad2a..0faa3772 100755
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
setuptools.setup(
name="zoobot",
- version="2.0.0",
+ version="2.0.1",
author="Mike Walmsley",
author_email="walmsleymk1@gmail.com",
description="Galaxy morphology classifiers",
@@ -97,7 +97,8 @@
'Sphinx',
'sphinxcontrib-napoleon',
'furo',
- 'docutils<0.18'
+ 'docutils<0.18',
+ 'sphinxemoji'
]
},
install_requires=[
@@ -116,6 +117,6 @@
'webdataset', # for reading webdataset files
'huggingface_hub', # login may be required
'setuptools', # no longer pinned
- 'galaxy-datasets>=0.0.17' # for dataset loading in both TF and Torch (see github/mwalmsley/galaxy-datasets)
+ 'galaxy-datasets>=0.0.18' # for dataset loading in both TF and Torch (see github/mwalmsley/galaxy-datasets)
]
)
diff --git a/zoobot/pytorch/examples/finetuning/finetune_binary_classification.py b/zoobot/pytorch/examples/finetuning/finetune_binary_classification.py
index c5309e8b..4cf7efff 100644
--- a/zoobot/pytorch/examples/finetuning/finetune_binary_classification.py
+++ b/zoobot/pytorch/examples/finetuning/finetune_binary_classification.py
@@ -26,11 +26,6 @@
# For binary classification, the label column should have binary (0 or 1) labels for your classes
# To support more complicated labels, Zoobot expects a list of columns. A list with one element works fine.
- # load a pretrained checkpoint saved here
- # https://www.dropbox.com/s/7ixwo59imjfz4ay/effnetb0_greyscale_224px.ckpt?dl=0
- # see https://zoobot.readthedocs.io/en/latest/data_notes.html for more options
- checkpoint_loc = os.path.join(zoobot_dir, 'data/pretrained_models/pytorch/effnetb0_greyscale_224px.ckpt')
-
# save the finetuning results here
save_dir = os.path.join(zoobot_dir, 'results/pytorch/finetune/finetune_binary_classification')
@@ -47,7 +42,7 @@
model = finetune.FinetuneableZoobotClassifier(
- checkpoint_loc=checkpoint_loc,
+ name='hf_hub:mwalmsley/zoobot-encoder-convnext_nano',
num_classes=2,
n_layers=0 # only updating the head weights. Set e.g. 1, 2 to finetune deeper.
)
diff --git a/zoobot/pytorch/examples/finetuning/finetune_counts_full_tree.py b/zoobot/pytorch/examples/finetuning/finetune_counts_full_tree.py
index bca84a90..6ed2a231 100644
--- a/zoobot/pytorch/examples/finetuning/finetune_counts_full_tree.py
+++ b/zoobot/pytorch/examples/finetuning/finetune_counts_full_tree.py
@@ -10,6 +10,7 @@
from zoobot.pytorch.training import finetune
from zoobot.pytorch.predictions import predict_on_catalog
from zoobot.shared.schemas import gz_candels_ortho_schema
+from zoobot.shared.load_predictions import prediction_hdf5_to_summary_parquet
"""
Example for finetuning Zoobot on counts of volunteer responses throughout a complex decision tree (here, GZ CANDELS).
@@ -67,12 +68,12 @@
resize_after_crop=resize_after_crop
)
- checkpoint_loc = os.path.join(
- # TODO replace with path to downloaded checkpoints. See Zoobot README for download links.
- repo_dir, 'gz-decals-classifiers/results/benchmarks/pytorch/evo/uploaded/effnetb0_greyscale_224px.ckpt') # decals hparams
-
- model = finetune.FinetuneableZoobotTree(checkpoint_loc=checkpoint_loc, schema=schema)
-
+ model = finetune.FinetuneableZoobotTree(
+ name='hf_hub:mwalmsley/zoobot-encoder-convnext_nano',
+ schema=schema
+ )
+
+ # TODO set this to wherever you'd like to save your results
save_dir = os.path.join(
repo_dir, f'gz-decals-classifiers/results/finetune_{np.random.randint(1e8)}')
@@ -86,12 +87,16 @@
# now save predictions on test set to evaluate performance
datamodule_kwargs = {'batch_size': batch_size, 'resize_after_crop': resize_after_crop}
trainer_kwargs = {'devices': 1, 'accelerator': accelerator}
+
+ hdf5_loc = os.path.join(save_dir, 'test_predictions.hdf5')
predict_on_catalog.predict(
test_catalog,
model,
n_samples=1,
label_cols=schema.label_cols,
- save_loc=os.path.join(save_dir, 'test_predictions.csv'),
+ save_loc=hdf5_loc,
datamodule_kwargs=datamodule_kwargs,
trainer_kwargs=trainer_kwargs
)
+
+ prediction_hdf5_to_summary_parquet(hdf5_loc=hdf5_loc, save_loc=hdf5_loc.replace('.hdf5', 'summary.parquet'), schema=schema)
\ No newline at end of file
diff --git a/zoobot/pytorch/examples/representations/get_representations.py b/zoobot/pytorch/examples/representations/get_representations.py
index dc154485..d83974dc 100644
--- a/zoobot/pytorch/examples/representations/get_representations.py
+++ b/zoobot/pytorch/examples/representations/get_representations.py
@@ -1,32 +1,45 @@
import logging
import os
+import timm
+
from galaxy_datasets import demo_rings
from zoobot.pytorch.training import finetune, representations
from zoobot.pytorch.estimators import define_model
from zoobot.pytorch.predictions import predict_on_catalog
+from zoobot.pytorch.training import finetune
from zoobot.shared import load_predictions, schemas
-def main(catalog, checkpoint_loc, save_dir):
+def main(catalog, save_dir, name="hf_hub:mwalmsley/zoobot-encoder-convnext_nano"):
assert all([os.path.isfile(x) for x in catalog['file_loc']])
if not os.path.exists(save_dir):
os.mkdir(save_dir)
- # can load from either ZoobotTree checkpoint (if trained from scratch)
- encoder = define_model.ZoobotTree.load_from_checkpoint(checkpoint_loc).encoder
- # or FinetuneableZoobotTree (if finetuned)
- # currently, FinetuneableZoobotTree checkpoints should be loaded as ZoobotTree with the args below
- # this is a bit awkward and I'm working on a clearer method - but it does work.
- # encoder = define_model.ZoobotTree.load_from_checkpoint(checkpoint_loc, output_dim=TODO, question_index_groups=[]).encoder
+ # load the encoder
+
+ # OPTION 1
+ # Load a pretrained model from HuggingFace, with no finetuning, only as published
+ model = representations.ZoobotEncoder.load_from_name(name)
+ # or equivalently (the above is just a wrapper for these two lines below)
+ # encoder = timm.create_model(model_name=name, pretrained=True)
+ # model = representations.ZoobotEncoder(encoder=encoder)
- # convert to simple pytorch lightning model
- model = representations.ZoobotEncoder(encoder=encoder, pyramid=False)
+ """
+ # OPTION 2
- label_cols = [f'feat_{n}' for n in range(1280)]
+ # Load a model that has been finetuned on your own data
+ # (...do your usual finetuning..., or load a finetuned model with finetune.FinetuneableZoobotClassifier(checkpoint_loc=....ckpt)
+ encoder = finetuned_model.encoder
+ # and then convert to simple pytorch lightning model. You can use any pytorch model here.
+ model = representations.ZoobotEncoder(encoder=encoder)
+ """
+
+ encoder_dim = define_model.get_encoder_dim(model.encoder)
+ label_cols = [f'feat_{n}' for n in range(encoder_dim)]
save_loc = os.path.join(save_dir, 'representations.hdf5')
accelerator = 'cpu' # or 'gpu' if available
@@ -52,20 +65,17 @@ def main(catalog, checkpoint_loc, save_dir):
logging.basicConfig(level=logging.INFO)
- # load the gz evo model for representations
- checkpoint_loc = '/home/walml/repos/gz-decals-classifiers/results/benchmarks/pytorch/evo/evo_py_gr_11941/checkpoints/epoch=73-step=42698.ckpt'
-
# use this demo dataset
# TODO change this to wherever you'd like, it will auto-download
- data_dir = '/home/walml/repos/galaxy-datasets/roots/demo_rings'
+ data_dir = '/Users/user/repos/galaxy-datasets/roots/demo_rings'
catalog, _ = demo_rings(root=data_dir, download=True, train=True)
print(catalog.head())
# zoobot expects id_str and file_loc columns, so add these if needed
# save the representations here
# TODO change this to wherever you'd like
- save_dir = os.path.join('/home/walml/repos/zoobot/results/pytorch/representations/example')
+ save_dir = os.path.join('/Users/user/repos/zoobot/results/pytorch/representations/example')
- representations_loc = main(catalog, checkpoint_loc, save_dir)
+ representations_loc = main(catalog, save_dir)
rep_df = load_predictions.single_forward_pass_hdf5s_to_df(representations_loc)
print(rep_df)
diff --git a/zoobot/pytorch/training/finetune.py b/zoobot/pytorch/training/finetune.py
index 29c0b334..405594a2 100644
--- a/zoobot/pytorch/training/finetune.py
+++ b/zoobot/pytorch/training/finetune.py
@@ -62,13 +62,14 @@ class FinetuneableZoobotAbstract(pl.LightningModule):
dropout_prob (float, optional): P of dropout before final output layer. Defaults to 0.5.
always_train_batchnorm (bool, optional): Temporarily deprecated. Previously, if True, do not update batchnorm stats during finetuning. Defaults to True.
cosine_schedule (bool, optional): Reduce the learning rate each epoch according to a cosine schedule, after warmup_epochs. Defaults to False.
- warmup_epochs (int, optional): Linearly increase the learning rate from 0 to `learning_rate` over the first `warmup_epochs` epochs, before applying cosine schedule. No effect if cosine_schedule=False.
- max_cosine_epochs (int, optional): Epochs for the scheduled learning rate to decay to final learning rate (below). Warmup epochs don't count. No effect if `cosine_schedule=False`.
- max_learning_rate_reduction_factor (float, optional): Set final learning rate as `learning_rate` * `max_learning_rate_reduction_factor`. No effect if `cosine_schedule=False`.
- from_scratch (bool, optional): Ignore all settings above and train from scratch at `learning_rate` for all layers. Useful for a quick baseline. Defaults to False.
+ warmup_epochs (int, optional): Linearly increase the learning rate from 0 to ``learning_rate`` over the first ``warmup_epochs`` epochs, before applying cosine schedule. No effect if cosine_schedule=False.
+ max_cosine_epochs (int, optional): Epochs for the scheduled learning rate to decay to final learning rate (below). Warmup epochs don't count. No effect if ``cosine_schedule=False``.
+ max_learning_rate_reduction_factor (float, optional): Set final learning rate as ``learning_rate`` * ``max_learning_rate_reduction_factor``. No effect if ``cosine_schedule=False``.
+ from_scratch (bool, optional): Ignore all settings above and train from scratch at ``learning_rate`` for all layers. Useful for a quick baseline. Defaults to False.
prog_bar (bool, optional): Print progress bar during finetuning. Defaults to True.
visualize_images (bool, optional): Upload example images to WandB. Good for debugging but slow. Defaults to False.
seed (int, optional): random seed to use. Defaults to 42.
+ n_layers: No effect, deprecated. Use n_blocks instead.
"""
def __init__(
@@ -118,8 +119,15 @@ def __init__(
# FinetuneableZoobotTree.load_from_checkpoint(loc, encoder=encoder)
if name is not None:
- assert encoder is None, "Cannot pass both name and encoder to use"
- self.encoder = timm.create_model(name, num_classes=0, pretrained=True)
+ assert encoder is None, 'Cannot pass both name and encoder to use'
+ if 'greyscale' in name:
+ # I'm not sure why timm is happy to convert color model stem to greyscale
+ # but doesn't correctly load greyscale model without this hack
+ logging.info('Loading greyscale model (auto-detected from name)')
+ timm_kwargs = {'in_chans': 1}
+ else:
+ timm_kwargs = {}
+ self.encoder = timm.create_model(name, num_classes=0, pretrained=True, **timm_kwargs)
self.encoder_dim = self.encoder.num_features
elif zoobot_checkpoint_loc is not None:
@@ -408,7 +416,7 @@ def upload_images_to_wandb(self, outputs, batch, batch_idx):
@classmethod
def load_from_name(cls, name: str, **kwargs):
- downloaded_loc = download_from_name(cls.__name__, name, **kwargs)
+ downloaded_loc = download_from_name(cls.__name__, name)
return cls.load_from_checkpoint(
downloaded_loc, **kwargs
) # trained on GPU, may need map_location='cpu' if you get a device error
diff --git a/zoobot/shared/schemas.py b/zoobot/shared/schemas.py
index 3f85dbbe..b0123fc3 100755
--- a/zoobot/shared/schemas.py
+++ b/zoobot/shared/schemas.py
@@ -299,3 +299,6 @@ def answers(self):
gz_ukidss_schema = Schema(label_metadata.ukidss_ortho_pairs, label_metadata.ukidss_ortho_dependencies)
gz_jwst_schema = Schema(label_metadata.jwst_ortho_pairs, label_metadata.jwst_ortho_dependencies)
+
+euclid_ortho_schema = Schema(label_metadata.euclid_ortho_pairs , label_metadata.euclid_ortho_dependencies)
+euclid_schema = Schema(label_metadata.euclid_pairs , label_metadata.euclid_dependencies)