diff --git a/.gitignore b/.gitignore
index aedc8d7c..ac4340c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -81,3 +81,4 @@ venv/
 
 # written by setuptools_scm
 **/_version.py
+benchmarks/results/*
diff --git a/MANIFEST.in b/MANIFEST.in
index 34cf45e6..27538bc8 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -9,3 +9,8 @@ recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
 recursive-exclude docs *
 recursive-exclude tests *
+
+include *.json
+recursive-include benchmarks *.json
+recursive-include benchmarks *.py
+recursive-exclude benchmarks/results *
diff --git a/asv.conf.json b/asv.conf.json
new file mode 100644
index 00000000..a053875b
--- /dev/null
+++ b/asv.conf.json
@@ -0,0 +1,194 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "brainglobe_workflows",
+
+    // The project's homepage
+    "project_url": "https://github.com/brainglobe/brainglobe-workflows",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": ".",
+
+    // The Python project's subdirectory in your repo.  If missing or
+    // the empty string, the project is assumed to be located at the root
+    // of the repository.
+    // "repo_subdir": "",
+
+    // Customizable commands for building the project.
+    // See asv.conf.json documentation.
+    // To build the package using pyproject.toml (PEP518), uncomment the following lines
+    "build_command": [
+        "python -m pip install build",
+        "python -m build",
+        "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    ],
+    // To build the package using setuptools and a setup.py file, uncomment the following lines
+    // "build_command": [
+    //     "python setup.py build",
+    //     "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    // ],
+
+    // Customizable commands for installing and uninstalling the project.
+    // See asv.conf.json documentation.
+    "install_command": ["in-dir={env_dir} python -mpip install --force-reinstall {wheel_file}"],
+    "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": ["smg/cellfinder-cli-benchmark"], // for git
+    // "branches": ["default"],    // for mercurial
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    // "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv", "mamba" (above 3.8)
+    // or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "conda",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/brainglobe/brainglobe-workflows/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    "pythons": ["3.10"],
+
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+    "conda_channels": ["conda-forge", "defaults"],
+
+    // A conda environment file that is used for environment creation.
+    // "conda_environment_file": "environment.yml",
+
+    // The matrix of dependencies to test.  Each key of the "req"
+    // requirements dictionary is the name of a package (in PyPI) and
+    // the values are version numbers.  An empty list or empty string
+    // indicates to just test against the default (latest)
+    // version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed
+    // via pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // The ``@env`` and ``@env_nobuild`` keys contain the matrix of
+    // environment variables to pass to build and benchmark commands.
+    // An environment will be created for every combination of the
+    // cartesian product of the "@env" variables in this matrix.
+    // Variables in "@env_nobuild" will be passed to every environment
+    // during the benchmark phase, but will not trigger creation of
+    // new environments.  A value of ``null`` means that the variable
+    // will not be set for the current combination.
+    //
+    // "matrix": {
+    //     "req": {
+    //         "numpy": ["1.6", "1.7"],
+    //         "six": ["", null],  // test with and without six installed
+    //         "pip+emcee": [""]   // emcee is only available for install with pip.
+    //     },
+    //     "env": {"ENV_VAR_1": ["val1", "val2"]},
+    //     "env_nobuild": {"ENV_VAR_2": ["val3", null]},
+    // },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    // - req
+    //     Required packages
+    // - env
+    //     Environment variables
+    // - env_nobuild
+    //     Non-build environment variables
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
+    //     {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "req": {"numpy": "1.8"}, "env_nobuild": {"FOO": "123"}},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "req": {"libpython": ""}},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    // "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": "benchmarks/results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": "benchmarks/html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache results of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // the number of builds to keep, per environment.
+    "build_cache_size": 2,
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // },
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // },
+}
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/benchmarks/cellfinder.py b/benchmarks/cellfinder.py
new file mode 100644
index 00000000..76d364bc
--- /dev/null
+++ b/benchmarks/cellfinder.py
@@ -0,0 +1,227 @@
+import json
+import shutil
+from pathlib import Path
+
+import pooch
+from brainglobe_utils.IO.cells import save_cells
+from cellfinder_core.main import main as cellfinder_run
+from cellfinder_core.tools.IO import read_with_dask
+
+from brainglobe_workflows.cellfinder.cellfinder_main import (
+    DEFAULT_JSON_CONFIG_PATH,
+    CellfinderConfig,
+    run_workflow_from_cellfinder_run,
+)
+from brainglobe_workflows.cellfinder.cellfinder_main import (
+    setup as setup_cellfinder_workflow,
+)
+
+
+class TimeBenchmarkPrepGIN:
+    """
+
+    A base class for timing benchmarks for the cellfinder workflow.
+
+    It includes:
+     - a setup_cache function that downloads the GIN data specified in the
+       default_config.json to a local directory (created by asv). This function
+       runs only once before all repeats of the benchmark.
+    -  a setup function, that runs the setup steps for the workflow.
+    - a teardown function, that removes the output directory.
+
+    Notes
+    -----
+    The class includes some predefined attributes for timing benchmarks. For
+    the full list see
+    https://asv.readthedocs.io/en/stable/benchmarks.html#benchmark-attributes
+
+    Some asv benchmarking nomenclature:
+    - repeat: a benchmark repeat is made up of the following steps:
+      1- the `setup` is run,
+      2- then the timed benchmark routine is called for `n` iterations, and
+      3- finally that teardown function is run.
+      Each repeat generates a sample, which is the average time that the
+      routine took across all iterations. A new process is started for each
+      repeat of each benchmark. A calibration phase before running the repeat
+      computes the number of iterations that will be executed. Each benchmark
+      is run for a number of repeats. The setup_cache function is run only once
+      for all repeats of a benchmark (but it is discarded before the next
+      benchmark). By default `repeat` is set to 0, which means:
+        - if rounds==1 the default is
+            (min_repeat, max_repeat, max_time) = (1, 10, 20.0),
+        - if rounds != 1 the default is
+            (min_repeat, max_repeat, max_time) = (1, 5, 10.0)
+
+    - iterations (`number`): the number of iterations in each sample. Note that
+      `setup` and `teardown` are not run between iterations. asv will
+      automatically select the number of iterations so that each sample takes
+      approximately `sample_time` seconds.
+
+    - round: at each round, each benchmark is run for the specified number of
+      repeats. The idea is that we sample each benchmark over longer periods of
+      background performance variations.
+
+    - warmup time: asv will spend this time (in seconds) in calling the
+      benchmarked function repeatedly, before starting to run the actual
+      benchmark. If not specified, warmup_time defaults to 0.1 seconds
+
+    """
+
+    # Timing attributes
+    timeout = 600  # default: 60 s
+    version = (
+        None  # benchmark version. Default:None (i.e. hash of source code)
+    )
+    warmup_time = 0.1  # seconds
+    rounds = 2
+    repeat = 0
+    sample_time = 0.01  # default: 10 ms = 0.01 s;
+    min_run_count = 2  # default:2
+
+    # Custom attributes
+    input_config_path = str(DEFAULT_JSON_CONFIG_PATH)
+
+    def setup_cache(
+        self,
+    ):
+        """
+        Download the input data from the GIN repository to the local
+        directory specified in the default_config.json
+
+        Notes
+        -----
+        The `setup_cache` method only performs the computations once
+        per benchmark round and then caches the result to disk [1]_. It cannot
+        be parametrised [2]_.
+
+
+        [1] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#setup-and-teardown-functions
+        [2] https://asv.readthedocs.io/en/latest/writing_benchmarks.html#parameterized-benchmarks
+        """
+
+        # Check config file exists
+        assert Path(self.input_config_path).exists()
+
+        # Instantiate a CellfinderConfig from the input json file
+        # (assumes config is json serializable)
+        with open(self.input_config_path) as cfg:
+            config_dict = json.load(cfg)
+        config = CellfinderConfig(**config_dict)
+
+        # Download data with pooch
+        _ = pooch.retrieve(
+            url=config.data_url,
+            known_hash=config.data_hash,
+            path=config.install_path,
+            progressbar=True,
+            processor=pooch.Unzip(extract_dir=config.extract_dir_relative),
+        )
+
+        # Check paths to input data should now exist in config
+        assert Path(config.signal_dir_path).exists()
+        assert Path(config.background_dir_path).exists()
+
+    def setup(self):
+        """
+        Run the cellfinder workflow setup steps.
+
+        The command line input arguments are injected as dependencies.
+        """
+
+        # Run setup
+        cfg = setup_cellfinder_workflow(
+            [
+                "--config",
+                self.input_config_path,
+            ]
+        )
+
+        # Save configuration as attribute
+        self.cfg = cfg
+
+    def teardown(self):
+        """
+        Remove the cellfinder output directory.
+
+        The input data is kept for all repeats of the same benchmark,
+        to avoid repeated downloads from GIN.
+        """
+        shutil.rmtree(Path(self.cfg.output_path).resolve())
+
+
+class TimeFullWorkflow(TimeBenchmarkPrepGIN):
+    """
+    Time the full cellfinder workflow.
+
+    It includes reading the signal and background arrays with dask,
+    detecting the cells and saving the results to an XML file
+
+    Parameters
+    ----------
+    TimeBenchmarkPrepGIN : _type_
+        A base class for timing benchmarks for the cellfinder workflow.
+    """
+
+    def time_workflow_from_cellfinder_run(self):
+        run_workflow_from_cellfinder_run(self.cfg)
+
+
+class TimeReadInputDask(TimeBenchmarkPrepGIN):
+    """
+    Time the reading input data operations with dask
+
+    Parameters
+    ----------
+    TimeBenchmarkPrepGIN : _type_
+        A base class for timing benchmarks for the cellfinder workflow.
+    """
+
+    def time_read_signal_with_dask(self):
+        read_with_dask(self.cfg.signal_dir_path)
+
+    def time_read_background_with_dask(self):
+        read_with_dask(self.cfg.background_dir_path)
+
+
+class TimeDetectCells(TimeBenchmarkPrepGIN):
+    """
+    Time the cell detection main pipeline (`cellfinder_run`)
+
+    Parameters
+    ----------
+    TimeBenchmarkPrepGIN : _type_
+        A base class for timing benchmarks for the cellfinder workflow.
+    """
+
+    # extend basic setup function
+    def setup(self):
+        # basic setup
+        TimeBenchmarkPrepGIN.setup(self)
+
+        # add input data as arrays to config
+        self.signal_array = read_with_dask(self.cfg.signal_dir_path)
+        self.background_array = read_with_dask(self.cfg.background_dir_path)
+
+    def time_cellfinder_run(self):
+        cellfinder_run(
+            self.signal_array, self.background_array, self.cfg.voxel_sizes
+        )
+
+
+class TimeSaveCells(TimeBenchmarkPrepGIN):
+    # extend basic setup function
+    def setup(self):
+        # basic setup
+        TimeBenchmarkPrepGIN.setup(self)
+
+        # add input data as arrays to config
+        self.signal_array = read_with_dask(self.cfg.signal_dir_path)
+        self.background_array = read_with_dask(self.cfg.background_dir_path)
+
+        # detect cells
+        self.detected_cells = cellfinder_run(
+            self.signal_array, self.background_array, self.cfg.voxel_sizes
+        )
+
+    def time_save_cells(self):
+        save_cells(self.detected_cells, self.cfg.detected_cells_path)
diff --git a/brainglobe_workflows/cellfinder/cellfinder_main.py b/brainglobe_workflows/cellfinder/cellfinder_main.py
index 04ec7663..fd19db34 100644
--- a/brainglobe_workflows/cellfinder/cellfinder_main.py
+++ b/brainglobe_workflows/cellfinder/cellfinder_main.py
@@ -96,33 +96,272 @@ class CellfinderConfig:
     detected_cells_path: Pathlike = ""
 
 
-def setup_logger() -> logging.Logger:
-    """Setup a logger for this script
+def setup(argv=None) -> CellfinderConfig:
+    def parse_cli_arguments(argv_) -> argparse.Namespace:
+        """Define argument parser for cellfinder
+        workflow script.
+
+        It expects a path to a json file with the
+        parameters required to run the workflow.
+        If none is provided, the default
+
+        Returns
+        -------
+        args : argparse.Namespace
+            command line input arguments parsed
+        """
+        # initialise argument parser
+        parser = argparse.ArgumentParser(
+            description=(
+                "To launch the workflow with "
+                "a specific set of input parameters, run: "
+                "`python cellfinder_main.py --config path/to/config.json`"
+                "where path/to/input/config.json is the json file "
+                "containing the workflow parameters."
+            )
+        )
+        # add arguments
+        parser.add_argument(
+            "-c",
+            "--config",
+            default=str(DEFAULT_JSON_CONFIG_PATH),
+            type=str,
+            metavar="CONFIG",  # a name for usage messages
+            help="",
+        )
 
-    The logger's level is set to DEBUG, and it
-    is linked to a handler that writes to the
-    console and whose level is
+        # build parser object
+        args = parser.parse_args(argv_)
 
-    Returns
-    -------
-    logging.Logger
-        a logger object
-    """
-    # define handler that writes to stdout
-    console_handler = logging.StreamHandler(sys.stdout)
-    console_format = logging.Formatter("%(name)s %(levelname)s: %(message)s")
-    console_handler.setFormatter(console_format)
+        # print error if required arguments not provided
+        if not args.config:
+            logger.error("Paths to input config not provided.")
+            parser.print_help()
+
+        return args
 
-    # define logger and link to handler
-    logger = logging.getLogger(
-        __name__
-    )  # if imported as a module, the logger is named after the module
-    logger.setLevel(logging.DEBUG)
-    logger.addHandler(console_handler)
-    return logger
+    def setup_logger() -> logging.Logger:
+        """Setup a logger for this script
 
+        The logger's level is set to DEBUG, and it
+        is linked to a handler that writes to the
+        console and whose level is
 
-def run_workflow_from_cellfinder_run(config: CellfinderConfig):
+        Returns
+        -------
+        logging.Logger
+            a logger object
+        """
+        # define handler that writes to stdout
+        console_handler = logging.StreamHandler(sys.stdout)
+        console_format = logging.Formatter(
+            "%(name)s %(levelname)s: %(message)s"
+        )
+        console_handler.setFormatter(console_format)
+
+        # define logger and link to handler
+        logger = logging.getLogger(
+            __name__
+        )  # if imported as a module, the logger is named after the module
+        logger.setLevel(logging.DEBUG)
+        logger.addHandler(console_handler)
+        return logger
+
+    def setup_workflow(input_config_path: Path) -> CellfinderConfig:
+        """Run setup steps prior to executing the workflow
+
+        These setup steps include:
+        - instantiating a CellfinderConfig object with the required parameters,
+        - checking if the input data exists locally, and fetching from
+        GIN repository otherwise,
+        - adding the path to the input data files to the config, and
+        - creating a timestamped directory for the output of the workflow if
+        it doesn't exist and adding its path to the config
+
+        Parameters
+        ----------
+        input_config_path : Path
+            path to the input config file
+
+        Returns
+        -------
+        config : CellfinderConfig
+            a dataclass whose attributes are the parameters
+            for running cellfinder.
+        """
+
+        # Check config file exists
+        assert input_config_path.exists()
+
+        # Instantiate a CellfinderConfig from the input json file
+        # (assumes config is json serializable)
+        with open(input_config_path) as cfg:
+            config_dict = json.load(cfg)
+        config = CellfinderConfig(**config_dict)
+
+        # Print info logs for status
+        logger.info(f"Input config read from {input_config_path}")
+        if input_config_path == DEFAULT_JSON_CONFIG_PATH:
+            logger.info("Using default config file")
+
+        # Retrieve and add lists of input data to the config,
+        # if these are defined yet
+        if not (config.list_signal_files and config.list_signal_files):
+            # build fullpaths to inputs
+            config.signal_dir_path = str(
+                Path(config.install_path)
+                / config.extract_dir_relative
+                / config.signal_subdir
+            )
+            config.background_dir_path = str(
+                Path(config.install_path)
+                / config.extract_dir_relative
+                / config.background_subdir
+            )
+            # retrieve data
+            config = retrieve_input_data(config)
+
+        # Create timestamped output directory if it doesn't exist
+        timestamp = datetime.datetime.now()
+        timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
+        output_path_timestamped = Path(config.install_path) / (
+            str(config.output_path_basename_relative) + timestamp_formatted
+        )
+        output_path_timestamped.mkdir(parents=True, exist_ok=True)
+
+        # Add output path and output file path to config
+        config.output_path = output_path_timestamped
+        config.detected_cells_path = (
+            config.output_path / config.detected_cells_filename
+        )
+
+        return config
+
+    def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
+        """
+        Adds the lists of input data files (signal and background)
+        to the config.
+
+        It first checks if the input data exists locally.
+        - If both directories (signal and background) exist, the lists of
+        signal and background files are added to the config.
+        - If exactly one of the input data directories is missing, an error
+        message is logged.
+        - If neither of them exist, the data is retrieved from the provided GIN
+        repository. If no URL or hash to GIN is provided, an error is shown.
+
+        Parameters
+        ----------
+        config : CellfinderConfig
+            a dataclass whose attributes are the parameters
+            for running cellfinder.
+
+        Returns
+        -------
+        config : CellfinderConfig
+            a dataclass whose attributes are the parameters
+            for running cellfinder.
+        """
+        # Check if input data (signal and background) exist locally.
+        # If both directories exist, get list of signal and background files
+        if (
+            Path(config.signal_dir_path).exists()
+            and Path(config.background_dir_path).exists()
+        ):
+            logger.info("Fetching input data from the local directories")
+
+            config.list_signal_files = [
+                f
+                for f in Path(config.signal_dir_path).resolve().iterdir()
+                if f.is_file()
+            ]
+            config.list_background_files = [
+                f
+                for f in Path(config.background_dir_path).resolve().iterdir()
+                if f.is_file()
+            ]
+
+        # If exactly one of the input data directories is missing, print error
+        elif (
+            Path(config.signal_dir_path).resolve().exists()
+            or Path(config.background_dir_path).resolve().exists()
+        ):
+            if not Path(config.signal_dir_path).resolve().exists():
+                logger.error(
+                    f"The directory {config.signal_dir_path} does not exist"
+                )
+            else:
+                logger.error(
+                    f"The directory {config.background_dir_path} "
+                    "does not exist"
+                )
+
+        # If neither of them exist, retrieve data from GIN repository
+        else:
+            # check if GIN URL and hash are defined (log error otherwise)
+            if (not config.data_url) or (not config.data_hash):
+                logger.error(
+                    "Input data not found locally, and URL/hash to "
+                    "GIN repository not provided"
+                )
+
+            else:
+                # get list of files in GIN archive with pooch.retrieve
+                list_files_archive = pooch.retrieve(
+                    url=config.data_url,
+                    known_hash=config.data_hash,
+                    path=config.install_path,  # zip will be downloaded here
+                    progressbar=True,
+                    processor=pooch.Unzip(
+                        extract_dir=config.extract_dir_relative
+                        # path to unzipped dir,
+                        # *relative* to the path set in 'path'
+                    ),
+                )
+                logger.info(
+                    "Fetching input data from the provided GIN repository"
+                )
+
+                # Check signal and background parent directories exist now
+                assert Path(config.signal_dir_path).resolve().exists()
+                assert Path(config.background_dir_path).resolve().exists()
+
+                # Add signal files to config
+                config.list_signal_files = [
+                    f
+                    for f in list_files_archive
+                    if f.startswith(
+                        str(Path(config.signal_dir_path).resolve())
+                    )  # if str(config.signal_dir_path) in f
+                ]
+
+                # Add background files to config
+                config.list_background_files = [
+                    f
+                    for f in list_files_archive
+                    if f.startswith(
+                        str(Path(config.background_dir_path).resolve())
+                    )  # if str(config.background_dir_path) in f
+                ]
+
+        return config
+
+    # parse command line input arguments:
+    # sys.argv in most cases except for testing
+    # see https://paiml.com/docs/home/books/testing-in-python/chapter08-monkeypatching/#the-simplest-monkeypatching
+    argv = argv or sys.argv[1:]
+    args = parse_cli_arguments(argv)
+
+    # setup logger
+    logger = setup_logger()
+
+    # run setup steps and return config
+    cfg = setup_workflow(Path(args.config))
+
+    return cfg
+
+
+def run_workflow_from_cellfinder_run(cfg: CellfinderConfig):
     """
     Run workflow based on the cellfinder_core.main.main()
     function.
@@ -131,260 +370,35 @@ def run_workflow_from_cellfinder_run(config: CellfinderConfig):
     1. Read the input signal and background data as two separate
        Dask arrays.
     2. Run the main cellfinder pipeline on the input Dask arrays,
-       with the parameters defined in the input configuration (config).
+       with the parameters defined in the input configuration (cfg).
     3. Save the detected cells as an xml file to the location specified in
-       the input configuration (config).
+       the input configuration (cfg).
 
     Parameters
     ----------
-    config : CellfinderConfig
+    cfg : CellfinderConfig
         a class with the required setup methods and parameters for
         the cellfinder workflow
     """
     # Read input data as Dask arrays
-    signal_array = read_with_dask(config.signal_dir_path)
-    background_array = read_with_dask(config.background_dir_path)
+    signal_array = read_with_dask(cfg.signal_dir_path)
+    background_array = read_with_dask(cfg.background_dir_path)
 
     # Run main analysis using `cellfinder_run`
     detected_cells = cellfinder_run(
-        signal_array, background_array, config.voxel_sizes
+        signal_array, background_array, cfg.voxel_sizes
     )
 
     # Save results to xml file
     save_cells(
         detected_cells,
-        config.detected_cells_path,
-    )
-
-
-def setup_workflow(input_config_path: Path) -> CellfinderConfig:
-    """Run setup steps prior to executing the workflow
-
-    These setup steps include:
-    - instantiating a CellfinderConfig object with the required parameters,
-    - checking if the input data exists locally, and fetching from
-      GIN repository otherwise,
-    - adding the path to the input data files to the config, and
-    - creating a timestamped directory for the output of the workflow if
-      it doesn't exist and adding its path to the config
-
-    Parameters
-    ----------
-    input_config_path : Path
-        path to the input config file
-
-    Returns
-    -------
-    config : CellfinderConfig
-        a dataclass whose attributes are the parameters
-        for running cellfinder.
-    """
-
-    # Check config file exists
-    assert input_config_path.exists()
-
-    # Instantiate a CellfinderConfig from the input json file
-    # (assumes config is json serializable)
-    with open(input_config_path) as c:
-        config_dict = json.load(c)
-    config = CellfinderConfig(**config_dict)
-
-    # Print info logs for status
-    logger.info(f"Input config read from {input_config_path}")
-    if input_config_path == DEFAULT_JSON_CONFIG_PATH:
-        logger.info("Using default config file")
-
-    # Retrieve and add lists of input data to the config,
-    # if these are defined yet
-    if not (config.list_signal_files and config.list_background_files):
-        # build fullpaths to inputs
-        config.signal_dir_path = str(
-            Path(config.install_path)
-            / config.extract_dir_relative
-            / config.signal_subdir
-        )
-        config.background_dir_path = str(
-            Path(config.install_path)
-            / config.extract_dir_relative
-            / config.background_subdir
-        )
-        # retrieve data
-        config = retrieve_input_data(config)
-
-    # Create timestamped output directory if it doesn't exist
-    timestamp = datetime.datetime.now()
-    timestamp_formatted = timestamp.strftime("%Y%m%d_%H%M%S")
-    output_path_timestamped = Path(config.install_path) / (
-        str(config.output_path_basename_relative) + timestamp_formatted
-    )
-    output_path_timestamped.mkdir(parents=True, exist_ok=True)
-
-    # Add output path and output file path to config
-    config.output_path = output_path_timestamped
-    config.detected_cells_path = (
-        config.output_path / config.detected_cells_filename
+        cfg.detected_cells_path,
     )
 
-    return config
-
-
-def retrieve_input_data(config: CellfinderConfig) -> CellfinderConfig:
-    """
-    Adds the lists of input data files (signal and background) to the config.
-
-    It first checks if the input data exists locally.
-    - If both directories (signal and background) exist, the lists of signal
-      and background files are added to the config.
-    - If exactly one of the input data directories is missing, an error
-      message is logged.
-    - If neither of them exist, the data is retrieved from the provided GIN
-      repository. If no URL or hash to GIN is provided, an error is shown.
-
-    Parameters
-    ----------
-    config : CellfinderConfig
-        a dataclass whose attributes are the parameters
-        for running cellfinder.
-
-    Returns
-    -------
-    config : CellfinderConfig
-        a dataclass whose attributes are the parameters
-        for running cellfinder.
-    """
-    # Check if input data (signal and background) exist locally.
-    # If both directories exist, get list of signal and background files
-    if (
-        Path(config.signal_dir_path).exists()
-        and Path(config.background_dir_path).exists()
-    ):
-        logger.info("Fetching input data from the local directories")
-
-        config.list_signal_files = [
-            f
-            for f in Path(config.signal_dir_path).resolve().iterdir()
-            if f.is_file()
-        ]
-        config.list_background_files = [
-            f
-            for f in Path(config.background_dir_path).resolve().iterdir()
-            if f.is_file()
-        ]
-
-    # If exactly one of the input data directories is missing, print error
-    elif (
-        Path(config.signal_dir_path).resolve().exists()
-        or Path(config.background_dir_path).resolve().exists()
-    ):
-        if not Path(config.signal_dir_path).resolve().exists():
-            logger.error(
-                f"The directory {config.signal_dir_path} does not exist"
-            )
-        else:
-            logger.error(
-                f"The directory {config.background_dir_path} does not exist"
-            )
-
-    # If neither of them exist, retrieve data from GIN repository
-    else:
-        # check if GIN URL and hash are defined (log error otherwise)
-        if (not config.data_url) or (not config.data_hash):
-            logger.error(
-                "Input data not found locally, and URL/hash to "
-                "GIN repository not provided"
-            )
-
-        else:
-            # get list of files in GIN archive with pooch.retrieve
-            list_files_archive = pooch.retrieve(
-                url=config.data_url,
-                known_hash=config.data_hash,
-                path=config.install_path,  # zip will be downloaded here
-                progressbar=True,
-                processor=pooch.Unzip(
-                    extract_dir=config.extract_dir_relative
-                    # path to unzipped dir,
-                    # *relative* to the path set in 'path'
-                ),
-            )
-            logger.info("Fetching input data from the provided GIN repository")
-
-            # Check signal and background parent directories exist now
-            assert Path(config.signal_dir_path).resolve().exists()
-            assert Path(config.background_dir_path).resolve().exists()
-
-            # Add signal files to config
-            config.list_signal_files = [
-                f
-                for f in list_files_archive
-                if f.startswith(
-                    str(Path(config.signal_dir_path).resolve())
-                )  # if str(config.signal_dir_path) in f
-            ]
-
-            # Add background files to config
-            config.list_background_files = [
-                f
-                for f in list_files_archive
-                if f.startswith(
-                    str(Path(config.background_dir_path).resolve())
-                )  # if str(config.background_dir_path) in f
-            ]
-
-    return config
-
-
-def parse_cli_arguments() -> argparse.Namespace:
-    """Define argument parser for cellfinder
-    workflow script.
-
-    It expects a path to a json file with the
-    parameters required to run the workflow.
-    If none is provided, the default
-
-    Returns
-    -------
-    args : argparse.Namespace
-        command line input arguments parsed
-    """
-    # initialise argument parser
-    parser = argparse.ArgumentParser(
-        description=(
-            "To launch the workflow with "
-            "a desired set of input parameters, run:"
-            " `python cellfinder_main.py --config path/to/input/config.json` "
-            "where path/to/input/config.json is the json file "
-            "containing the workflow parameters."
-        )
-    )
-    # add arguments
-    parser.add_argument(
-        "-c",
-        "--config",
-        default=str(DEFAULT_JSON_CONFIG_PATH),
-        type=str,
-        metavar="CONFIG",  # a name for usage messages
-        help="",
-    )
-
-    # build parser object
-    args = parser.parse_args()
-
-    # print error if required arguments not provided
-    if not args.config:
-        logger.error("Paths to input config not provided.")
-        parser.print_help()
-
-    return args
-
 
 if __name__ == "__main__":
-    # setup logger
-    logger = setup_logger()
-
-    # parse command line arguments
-    args = parse_cli_arguments()
+    # run setup
+    cfg = setup()
 
     # run workflow
-    config = setup_workflow(Path(args.config))
-    run_workflow_from_cellfinder_run(config)  # only this will be benchmarked
+    run_workflow_from_cellfinder_run(cfg)  # only this will be benchmarked