diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 589d7118..14a733bc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -35,19 +35,9 @@ jobs:
         with:
           version: "${{ matrix.NXF_VER }}"
 
-      - name: Download the NCBI taxdump database
-        run: |
-          mkdir ncbi_taxdump
-          curl -L https://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -C ncbi_taxdump -xzf -
-
-      - name: Download the BUSCO lineage database
-        run: |
-          mkdir busco_database
-          curl -L https://tolit.cog.sanger.ac.uk/test-data/resources/busco/blobtoolkit.GCA_922984935.2.2023-08-03.lineages.tar.gz | tar -C busco_database -xzf -
-
       - name: Run pipeline with test data
         # You can customise CI pipeline run tests as required
         # For example: adding multiple test runs with different parameters
         # Remember that you can parallelise this by using strategy.matrix
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --taxdump $PWD/ncbi_taxdump --busco $PWD/busco_database --outdir ./results
+          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b07486f..c5d7c7b9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [[0.7.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.7.0)] – Psyduck – [2024-10-02]
+## [[0.7.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.7.0)] – Psyduck – [2024-11-20]
 
 The pipeline is now considered to be a complete and suitable replacement for the Snakemake version.
 
@@ -13,6 +13,7 @@ The pipeline is now considered to be a complete and suitable replacement for the
   to indicate in the samplesheet whether the reads are paired or single.
 - Updated the Blastn settings to allow 7 days runtime at most, since that
   covers 99.7% of the jobs.
+- Allow database inputs to be optionally compressed (`.tar.gz`)
 
 ### Software dependencies
 
diff --git a/assets/test/mMelMel3.1.buscogenes.dmnd b/assets/test/mMelMel3.1.buscogenes.dmnd
deleted file mode 100644
index 391345ba..00000000
Binary files a/assets/test/mMelMel3.1.buscogenes.dmnd and /dev/null differ
diff --git a/assets/test/mMelMel3.1.buscoregions.dmnd b/assets/test/mMelMel3.1.buscoregions.dmnd
deleted file mode 100644
index 91fa6042..00000000
Binary files a/assets/test/mMelMel3.1.buscoregions.dmnd and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ndb b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ndb
deleted file mode 100644
index 18062436..00000000
Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ndb and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nhr b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nhr
deleted file mode 100644
index 0b5d4906..00000000
Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nhr and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nin b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nin
deleted file mode 100644
index bebd568b..00000000
Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nin and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nog b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nog
deleted file mode 100644
index e6ef79c7..00000000
Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nog and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nos b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nos
deleted file mode 100644
index 99700566..00000000
Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nos and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.not b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.not
deleted file mode 100644
index 047e8d38..00000000
Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.not and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nsq b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nsq
deleted file mode 100644
index 48497573..00000000
Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nsq and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ntf b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ntf
deleted file mode 100644
index 3be5ea5b..00000000
Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ntf and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nto b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nto
deleted file mode 100644
index 6d4a41c7..00000000
Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nto and /dev/null differ
diff --git a/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 b/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3
deleted file mode 100644
index dc933c1f..00000000
Binary files a/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 and /dev/null differ
diff --git a/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd b/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd
deleted file mode 100644
index a0d0e1d2..00000000
Binary files a/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd and /dev/null differ
diff --git a/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd b/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd
deleted file mode 100644
index 3f2a1a54..00000000
Binary files a/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ndb b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ndb
deleted file mode 100644
index 0905629a..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ndb and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nhr b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nhr
deleted file mode 100644
index 1fa3521a..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nhr and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nin b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nin
deleted file mode 100644
index 0503c4c7..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nin and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nog b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nog
deleted file mode 100644
index 7dcd60eb..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nog and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nos b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nos
deleted file mode 100644
index 6bd1dcdf..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nos and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.not b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.not
deleted file mode 100644
index 8bacddec..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.not and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nsq b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nsq
deleted file mode 100644
index 6afe38e9..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nsq and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ntf b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ntf
deleted file mode 100644
index efd34086..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ntf and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nto b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nto
deleted file mode 100644
index 4b140ec3..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nto and /dev/null differ
diff --git a/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 b/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3
deleted file mode 100644
index 2a56a82f..00000000
Binary files a/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 and /dev/null differ
diff --git a/conf/test.config b/conf/test.config
index 1801bc09..20331442 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -30,11 +30,11 @@ params {
     taxon     = "Meles meles"
 
     // Databases
-    taxdump = "/lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump"
-    busco   = "/lustre/scratch123/tol/resources/nextflow/busco/blobtoolkit.GCA_922984935.2.2023-08-03"
-    blastp  = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd"
-    blastx  = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd"
-    blastn  = "${projectDir}/assets/test/nt_mMelMel3.1"
+    taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz"
+    busco   = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/blobtoolkit.GCA_922984935.2.2023-08-03.tar.gz"
+    blastp  = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscogenes.dmnd.tar.gz"
+    blastx  = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscoregions.dmnd.tar.gz"
+    blastn  = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/nt_mMelMel3.1.tar.gz"
 
     // Need to be set to avoid overfilling /tmp
     use_work_dir_as_temp = true
diff --git a/conf/test_full.config b/conf/test_full.config
index ca78130e..a86e0050 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -25,11 +25,11 @@ params {
     taxon     = "Laetiporus sulphureus"
 
     // Databases
-    taxdump = "/lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump"
+    taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz"
     busco   = "/lustre/scratch123/tol/resources/busco/latest"
-    blastp  = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd"
-    blastx  = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd"
-    blastn  = "${projectDir}/assets/test_full/nt_gfLaeSulp1.1"
+    blastp  = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/resources/gfLaeSulp1.1.buscogenes.dmnd.tar.gz"
+    blastx  = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/resources/gfLaeSulp1.1.buscoregions.dmnd.tar.gz"
+    blastn  = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/resources/nt_gfLaeSulp1.1.tar.gz"
 
     // Need to be set to avoid overfilling /tmp
     use_work_dir_as_temp = true
diff --git a/conf/test_raw.config b/conf/test_raw.config
index 0cf1d16f..7af9bd2e 100644
--- a/conf/test_raw.config
+++ b/conf/test_raw.config
@@ -31,11 +31,11 @@ params {
     taxon     = "Meles meles"
 
     // Databases
-    taxdump = "/lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump"
-    busco   = "/lustre/scratch123/tol/resources/nextflow/busco/blobtoolkit.GCA_922984935.2.2023-08-03"
-    blastp  = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd"
-    blastx  = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd"
-    blastn  = "${projectDir}/assets/test/nt_mMelMel3.1/"
+    taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz"
+    busco   = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/blobtoolkit.GCA_922984935.2.2023-08-03.tar.gz"
+    blastp  = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscogenes.dmnd.tar.gz"
+    blastx  = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscoregions.dmnd.tar.gz"
+    blastn  = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/nt_mMelMel3.1.tar.gz"
 
     // Need to be set to avoid overfilling /tmp
     use_work_dir_as_temp = true
diff --git a/docs/usage.md b/docs/usage.md
index d2ed32b1..6f8909bf 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -78,6 +78,9 @@ The BlobToolKit pipeline can be run in many different ways. The default way requ
 
 It is a good idea to put a date suffix for each database location so you know at a glance whether you are using the latest version. We are using the `YYYY_MM` format as we do not expect the databases to be updated more frequently than once a month. However, feel free to use `DATE=YYYY_MM_DD` or a different format if you prefer.
 
+Note that all input databases may be optionally passed directly to the pipeline compressed as `.tar.gz`, and the pipeline will handle decompression.
+The instructions below show how to build each input database in _two_ forms: decompressed _and_ compressed. You may not need to do both. Select the one that is most appropriate for how you want to use the pipeline.
+
 #### 1. NCBI taxdump database
 
 Create the database directory, retrieve and decompress the NCBI taxonomy:
@@ -85,8 +88,10 @@ Create the database directory, retrieve and decompress the NCBI taxonomy:
 ```bash
 DATE=2024_10
 TAXDUMP=/path/to/databases/taxdump_${DATE}
+TAXDUMP_TAR=/path/to/databases/taxdump_${DATE}.tar.gz
 mkdir -p "$TAXDUMP"
-curl -L ftp://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -xzf - -C "$TAXDUMP"
+curl -L ftp://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz -o $TAXDUMP_TAR
+tar -xzf $TAXDUMP_TAR -C "$TAXDUMP"
 ```
 
 #### 2. NCBI nucleotide BLAST database
@@ -96,6 +101,7 @@ Create the database directory and move into the directory:
 ```bash
 DATE=2024_10
 NT=/path/to/databases/nt_${DATE}
+NT_TAR=/path/to/databases/nt_${DATE}.tar.gz
 mkdir -p $NT
 cd $NT
 ```
@@ -113,6 +119,11 @@ done
 wget "https://ftp.ncbi.nlm.nih.gov/blast/db/v5/taxdb.tar.gz" &&
 tar xf taxdb.tar.gz -C $NT &&
 rm taxdb.tar.gz
+
+# Compress and cleanup
+cd ..
+tar -cvzf $NT_TAR $NT
+rm -r $NT
 ```
 
 #### 3. UniProt reference proteomes database
@@ -126,6 +137,7 @@ Create the database directory and move into the directory:
 ```bash
 DATE=2024_10
 UNIPROT=/path/to/databases/uniprot_${DATE}
+UNIPROT_TAR=/path/to/databases/uniprot_${DATE}.tar.gz
 mkdir -p $UNIPROT
 cd $UNIPROT
 ```
@@ -152,6 +164,12 @@ diamond makedb -p 16 --in reference_proteomes.fasta.gz --taxonmap reference_prot
 # clean up
 mv extract/{README,STATS} .
 rm -r extract
+rm -r $TAXDUMP
+
+# Compress final database and cleanup
+cd ..
+tar -cvzf $UNIPROT_TAR $UNIPROT
+rm -r $UNIPROT
 ```
 
 #### 4. BUSCO databases
@@ -161,6 +179,7 @@ Create the database directory and move into the directory:
 ```bash
 DATE=2024_10
 BUSCO=/path/to/databases/busco_${DATE}
+BUSCO_TAR=/path/to/databases/busco_${DATE}.tar.gz
 mkdir -p $BUSCO
 cd $BUSCO
 ```
@@ -181,6 +200,13 @@ If you have [GNU parallel](https://www.gnu.org/software/parallel/) installed, yo
 find v5/data -name "*.tar.gz" | parallel "cd {//}; tar -xzf {/}"
 ```
 
+Finally re-compress and cleanup the files:
+
+```bash
+tar -cvzf $BUSCO_TAR $BUSCO
+rm -r $BUSCO
+```
+
 ## Changes from Snakemake to Nextflow
 
 ### Commands
diff --git a/modules.json b/modules.json
index 4af0bcd6..23b5b5d2 100644
--- a/modules.json
+++ b/modules.json
@@ -87,6 +87,11 @@
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff"
                     },
+                    "untar": {
+                        "branch": "master",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
+                        "installed_by": ["modules"]
+                    },
                     "windowmasker/mkcounts": {
                         "branch": "master",
                         "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac",
diff --git a/modules/local/generate_config.nf b/modules/local/generate_config.nf
index 2992ab6d..0d121fca 100644
--- a/modules/local/generate_config.nf
+++ b/modules/local/generate_config.nf
@@ -10,13 +10,11 @@ process GENERATE_CONFIG {
     val taxon_query
     val busco_lin
     path lineage_tax_ids
-    tuple val(meta2), path(blastn)
     val reads
-    // The following are passed as "val" because we just want to know the full paths. No staging necessary
-    val blastp_path
-    val blastx_path
-    val blastn_path
-    val taxdump_path
+    tuple val(meta2), path(blastp)
+    tuple val(meta3), path(blastx)
+    tuple val(meta4), path(blastn)
+    tuple val(meta5), path(taxdump)
 
     output:
     tuple val(meta), path("*.yaml")          , emit: yaml
@@ -43,10 +41,10 @@ process GENERATE_CONFIG {
         $accession_params \\
         --nt $blastn \\
         $input_reads \\
-        --blastp ${blastp_path} \\
-        --blastx ${blastx_path} \\
-        --blastn ${blastn_path} \\
-        --taxdump ${taxdump_path} \\
+        --blastp ${blastp} \\
+        --blastx ${blastx} \\
+        --blastn ${blastn} \\
+        --taxdump ${taxdump} \\
         --output_prefix ${prefix}
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml
new file mode 100644
index 00000000..c7794856
--- /dev/null
+++ b/modules/nf-core/untar/environment.yml
@@ -0,0 +1,7 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - conda-forge::grep=3.11
+  - conda-forge::sed=4.8
+  - conda-forge::tar=1.34
diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf
new file mode 100644
index 00000000..9bd8f554
--- /dev/null
+++ b/modules/nf-core/untar/main.nf
@@ -0,0 +1,84 @@
+process UNTAR {
+    tag "$archive"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/ubuntu:22.04' :
+        'nf-core/ubuntu:22.04' }"
+
+    input:
+    tuple val(meta), path(archive)
+
+    output:
+    tuple val(meta), path("$prefix"), emit: untar
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args  = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    prefix    = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, ""))
+
+    """
+    mkdir $prefix
+
+    ## Ensures --strip-components only applied when top level of tar contents is a directory
+    ## If just files or multiple directories, place all in prefix
+    if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then
+        tar \\
+            -C $prefix --strip-components 1 \\
+            -xavf \\
+            $args \\
+            $archive \\
+            $args2
+    else
+        tar \\
+            -C $prefix \\
+            -xavf \\
+            $args \\
+            $archive \\
+            $args2
+    fi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    prefix    = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, ""))
+    """
+    mkdir ${prefix}
+    ## Dry-run untaring the archive to get the files and place all in prefix
+    if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then
+        for i in `tar -tf ${archive}`;
+        do
+            if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]];
+            then
+                touch \${i}
+            else
+                mkdir -p \${i}
+            fi
+        done
+    else
+        for i in `tar -tf ${archive}`;
+        do
+            if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]];
+            then
+                touch ${prefix}/\${i}
+            else
+                mkdir -p ${prefix}/\${i}
+            fi
+        done
+    fi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml
new file mode 100644
index 00000000..290346b3
--- /dev/null
+++ b/modules/nf-core/untar/meta.yml
@@ -0,0 +1,49 @@
+name: untar
+description: Extract files.
+keywords:
+  - untar
+  - uncompress
+  - extract
+tools:
+  - untar:
+      description: |
+        Extract tar.gz files.
+      documentation: https://www.gnu.org/software/tar/manual/
+      licence: ["GPL-3.0-or-later"]
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - archive:
+        type: file
+        description: File to be untar
+        pattern: "*.{tar}.{gz}"
+output:
+  - untar:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - $prefix:
+          type: directory
+          description: Directory containing contents of archive
+          pattern: "*/"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@matthdsm"
+  - "@jfy133"
+maintainers:
+  - "@joseespinosa"
+  - "@drpatelh"
+  - "@matthdsm"
+  - "@jfy133"
diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test
new file mode 100644
index 00000000..c957517a
--- /dev/null
+++ b/modules/nf-core/untar/tests/main.nf.test
@@ -0,0 +1,85 @@
+nextflow_process {
+
+    name "Test Process UNTAR"
+    script "../main.nf"
+    process "UNTAR"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "untar"
+
+    test("test_untar") {
+
+        when {
+            process {
+                """
+                input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+            )
+        }
+    }
+
+    test("test_untar_onlyfiles") {
+
+        when {
+            process {
+                """
+                input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+            )
+        }
+    }
+
+    test("test_untar - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+            )
+        }
+    }
+
+    test("test_untar_onlyfiles - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ]
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() },
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap
new file mode 100644
index 00000000..ceb91b79
--- /dev/null
+++ b/modules/nf-core/untar/tests/main.nf.test.snap
@@ -0,0 +1,158 @@
+{
+    "test_untar_onlyfiles": {
+        "content": [
+            {
+                "0": [
+                    [
+                        [
+                            
+                        ],
+                        [
+                            "hello.txt:md5,e59ff97941044f85df5297e1c302d260"
+                        ]
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+                ],
+                "untar": [
+                    [
+                        [
+                            
+                        ],
+                        [
+                            "hello.txt:md5,e59ff97941044f85df5297e1c302d260"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-10T12:04:28.231047"
+    },
+    "test_untar_onlyfiles - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        [
+                            
+                        ],
+                        [
+                            "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+                ],
+                "untar": [
+                    [
+                        [
+                            
+                        ],
+                        [
+                            "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-10T12:04:45.773103"
+    },
+    "test_untar - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        [
+                            
+                        ],
+                        [
+                            "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+                ],
+                "untar": [
+                    [
+                        [
+                            
+                        ],
+                        [
+                            "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
+                            "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-10T12:04:36.777441"
+    },
+    "test_untar": {
+        "content": [
+            {
+                "0": [
+                    [
+                        [
+                            
+                        ],
+                        [
+                            "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9",
+                            "opts.k2d:md5,a033d00cf6759407010b21700938f543",
+                            "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c"
+                        ]
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+                ],
+                "untar": [
+                    [
+                        [
+                            
+                        ],
+                        [
+                            "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9",
+                            "opts.k2d:md5,a033d00cf6759407010b21700938f543",
+                            "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c"
+                        ]
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-10T12:04:19.377674"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml
new file mode 100644
index 00000000..feb6f15c
--- /dev/null
+++ b/modules/nf-core/untar/tests/tags.yml
@@ -0,0 +1,2 @@
+untar:
+  - modules/nf-core/untar/**
diff --git a/nextflow_schema.json b/nextflow_schema.json
index e722369d..3c75ab58 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -52,7 +52,8 @@
                     "type": "string",
                     "enum": ["png", "svg"],
                     "description": "Select the format of the output images.",
-                    "fa_icon": "fas fa-image"
+                    "fa_icon": "fas fa-image",
+                    "default": "png"
                 },
                 "outdir": {
                     "type": "string",
@@ -109,40 +110,40 @@
             "properties": {
                 "busco": {
                     "type": "string",
-                    "format": "directory-path",
+                    "format": "path",
                     "description": "Local directory where clade-specific BUSCO lineage datasets are stored",
                     "fa_icon": "fas fa-folder-open"
                 },
                 "lineage_tax_ids": {
                     "type": "string",
-                    "format": "file-path",
+                    "format": "path",
                     "description": "Local file that holds a mapping between BUSCO lineages and taxon IDs.",
                     "help_text": "Initialised from https://busco-data.ezlab.org/v5/data/placement_files/mapping_taxids-busco_dataset_name.*.2019-12-16.txt.tar.gz",
                     "fa_icon": "fas fa-file-code"
                 },
                 "blastp": {
                     "type": "string",
-                    "format": "file-path",
-                    "pattern": "^\\S+\\.dmnd$",
+                    "format": "path",
+                    "pattern": "^\\S+\\.dmnd.*$",
                     "description": "Path to the Diamond species-specific buscogenes database",
                     "fa_icon": "fas fa-file-archive"
                 },
                 "blastx": {
                     "type": "string",
-                    "format": "file-path",
-                    "pattern": "^\\S+\\.dmnd$",
+                    "format": "path",
+                    "pattern": "^\\S+\\.dmnd.*$",
                     "description": "Path to the Diamond species-specific buscoregions database",
                     "fa_icon": "fas fa-file-archive"
                 },
                 "blastn": {
                     "type": "string",
-                    "format": "directory-path",
+                    "format": "path",
                     "description": "Path to the nucleotide BLAST database",
                     "fa_icon": "fas fa-file-archive"
                 },
                 "taxdump": {
                     "type": "string",
-                    "format": "directory-path",
+                    "format": "path",
                     "description": "Path to the new NCBI tax dump database",
                     "fa_icon": "fas fa-folder-open"
                 }
diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf
index 4b07723e..2e1a442d 100644
--- a/subworkflows/local/busco_diamond_blastp.nf
+++ b/subworkflows/local/busco_diamond_blastp.nf
@@ -47,12 +47,11 @@ workflow BUSCO_DIAMOND {
         ch_fasta_with_lineage,
         "genome",
         ch_fasta_with_lineage.map { it[0].lineage_name },
-        busco_db,
+        busco_db.first(),
         [],
     )
     ch_versions = ch_versions.mix ( BUSCO.out.versions.first() )
 
-
     //
     // Tidy up the BUSCO output directories before publication
     //
diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf
index 69a4757a..223c01b7 100644
--- a/subworkflows/local/input_check.nf
+++ b/subworkflows/local/input_check.nf
@@ -2,6 +2,7 @@
 // Check input samplesheet and get aligned read channels
 //
 
+include { UNTAR                     } from '../../modules/nf-core/untar/main'
 include { CAT_CAT                   } from '../../modules/nf-core/cat/cat/main'
 include { SAMTOOLS_FLAGSTAT         } from '../../modules/nf-core/samtools/flagstat/main'
 include { SAMPLESHEET_CHECK         } from '../../modules/local/samplesheet_check'
@@ -15,11 +16,56 @@ workflow INPUT_CHECK {
     taxon       // channel: val(taxon)
     busco_lin   // channel: val([busco_lin])
     lineage_tax_ids        // channel: /path/to/lineage_tax_ids
-    blastn       // channel: [ val(meta), path(blastn_db) ]
+    blastn                  // channel: [ path(blastn_db) ]
+    blastp                  // channel: [ path(blastp_db) ]
+    blastx                  // channel: [ path(blastx_db) ]
+    busco_db                // channel: [ path(busco_db) ]
+    taxdump                 // channel: [ path(taxdump) ]
 
     main:
     ch_versions = Channel.empty()
 
+    //
+    // SUBWORKFLOW: Decompress databases if needed
+    //
+
+    // Join into single databases channel
+    databases = blastn.concat(blastp, blastx, busco_db, taxdump)
+
+    // Check which need to be decompressed
+    ch_dbs_for_untar = databases
+        .branch { db_meta, db_path ->
+            untar: db_path.name.endsWith( ".tar.gz" )
+            skip: true
+        }
+
+    // Untar the databases
+    UNTAR ( ch_dbs_for_untar.untar )
+    ch_versions = ch_versions.mix( UNTAR.out.versions.first() )
+
+    // Join and format dbs
+    // NOTE: The conditional for blastp/x is needed because nf-core/untar puts the database in a directory
+    ch_databases = UNTAR.out.untar.concat( ch_dbs_for_untar.skip )
+        .map { meta, db -> [ meta + [id: db.baseName], db] }
+        .map { db_meta, db_path ->
+            if (db_meta.type in ["blastp", "blastx"]) {
+                [db_meta, file(db_path.toString() + "/${db_path.name}", checkIfExists: true)]
+            } else {
+                [db_meta, db_path]
+            }
+        }
+        .branch { db_meta, db_path ->
+            blastn: db_meta.type == "blastn"
+            blastp: db_meta.type == "blastp"
+            blastx: db_meta.type == "blastx"
+            busco: db_meta.type == "busco"
+            taxdump: db_meta.type == "taxdump"
+        }
+
+
+    //
+    // SUBWORKFLOW: Process samplesheet
+    //
     if ( params.fetchngs_samplesheet ) {
         FETCHNGSSAMPLESHEET_CHECK ( samplesheet )
             .csv
@@ -66,12 +112,11 @@ workflow INPUT_CHECK {
         taxon,
         busco_lin,
         lineage_tax_ids,
-        blastn,
         reads.collect(flat: false).ifEmpty([]),
-        params.blastp,
-        params.blastx,
-        params.blastn,
-        params.taxdump,
+        ch_databases.blastp,
+        ch_databases.blastx,
+        ch_databases.blastn,
+        ch_databases.taxdump,
     )
     ch_versions = ch_versions.mix(GENERATE_CONFIG.out.versions.first())
 
@@ -115,6 +160,11 @@ workflow INPUT_CHECK {
     categories_tsv = GENERATE_CONFIG.out.categories_tsv // channel: [ val(meta), path(tsv) ]
     taxon_id = ch_taxon_id                  // channel: val(taxon_id)
     busco_lineages = ch_busco_lineages      // channel: val([busco_lin])
+    blastn = ch_databases.blastn            // channel: [ val(meta), path(blastn_db) ]
+    blastp = ch_databases.blastp            // channel: [ val(meta), path(blastp_db) ]
+    blastx = ch_databases.blastx            // channel: [ val(meta), path(blastx_db) ]
+    busco_db = ch_databases.busco.map { _, db_path -> db_path }           // channel: [ path(busco_db) ]
+    taxdump = ch_databases.taxdump.map { _, db_path -> db_path }          // channel: [ path(taxdump) ]
     versions = ch_versions                  // channel: [ versions.yml ]
 }
 
diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf
index 280278a7..d9effaa4 100644
--- a/workflows/blobtoolkit.nf
+++ b/workflows/blobtoolkit.nf
@@ -24,17 +24,21 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true
 if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
 if (params.fasta) { ch_fasta = Channel.value([ [ 'id': params.accession ?: file(params.fasta.replace(".gz", "")).baseName ], file(params.fasta) ]) } else { exit 1, 'Genome fasta file must be specified!' }
 if (params.taxon) { ch_taxon = Channel.value(params.taxon) } else { exit 1, 'NCBI Taxon ID not specified!' }
-if (params.blastp) { ch_blastp = Channel.value([ [ 'id': file(params.blastp).baseName ], params.blastp ]) } else { exit 1, 'Diamond BLASTp database must be specified!' }
-if (params.blastx) { ch_blastx = Channel.value([ [ 'id': file(params.blastx).baseName ], params.blastx ]) } else { exit 1, 'Diamond BLASTx database must be specified!' }
-if (params.blastn) { ch_blastn = Channel.value([ [ 'id': file(params.blastn).baseName ], params.blastn ]) } else { exit 1, 'BLASTn database not specified!' }
-if (params.taxdump) { ch_taxdump = file(params.taxdump) } else { exit 1, 'NCBI Taxonomy database not specified!' }
+if (params.blastp) { ch_blastp = Channel.fromPath(params.blastp).map { tuple(["type": "blastp"], it) } } else { exit 1, 'Diamond BLASTp database must be specified!' }
+if (params.blastx) { ch_blastx = Channel.fromPath(params.blastx).map { tuple(["type": "blastx"], it) } } else { exit 1, 'Diamond BLASTx database must be specified!' }
+if (params.blastn) { ch_blastn = Channel.fromPath(params.blastn).map { tuple(["type": "blastn"], it) } } else { exit 1, 'BLASTn database not specified!' }
+if (params.taxdump) { ch_taxdump = Channel.fromPath(params.taxdump).map { tuple(["type": "taxdump"], it) } } else { exit 1, 'NCBI Taxonomy database not specified!' }
 if (params.fetchngs_samplesheet && !params.align) { exit 1, '--align not specified, even though the input samplesheet is a nf-core/fetchngs one - i.e has fastq files!' }
 
 if (params.lineage_tax_ids) { ch_lineage_tax_ids = Channel.fromPath(params.lineage_tax_ids).first() } else { exit 1, 'Mapping BUSCO lineage <-> taxon_ids not specified' }
 
 // Create channel for optional parameters
 if (params.busco_lineages) { ch_busco_lin = Channel.value(params.busco_lineages) } else { ch_busco_lin = Channel.value([]) }
-if (params.busco) { ch_busco_db = Channel.fromPath(params.busco).first() } else { ch_busco_db = Channel.value([]) }
+if (params.busco) {
+    ch_busco_db = Channel.fromPath(params.busco).first().map { tuple([ "type": "busco"], it ) }
+} else {
+    ch_busco_db = Channel.value([])
+}
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -109,6 +113,10 @@ workflow BLOBTOOLKIT {
         ch_busco_lin,
         ch_lineage_tax_ids,
         ch_blastn,
+        ch_blastx,
+        ch_blastp,
+        ch_busco_db,
+        ch_taxdump,
     )
     ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions )
 
@@ -135,8 +143,8 @@ workflow BLOBTOOLKIT {
     BUSCO_DIAMOND (
         PREPARE_GENOME.out.genome,
         INPUT_CHECK.out.busco_lineages,
-        ch_busco_db,
-        ch_blastp,
+        INPUT_CHECK.out.busco_db,
+        INPUT_CHECK.out.blastp,
         INPUT_CHECK.out.taxon_id,
     )
     ch_versions = ch_versions.mix ( BUSCO_DIAMOND.out.versions )
@@ -147,7 +155,7 @@ workflow BLOBTOOLKIT {
     RUN_BLASTX (
         PREPARE_GENOME.out.genome,
         BUSCO_DIAMOND.out.first_table,
-        ch_blastx,
+        INPUT_CHECK.out.blastx,
         INPUT_CHECK.out.taxon_id,
     )
     ch_versions = ch_versions.mix ( RUN_BLASTX.out.versions )
@@ -159,7 +167,7 @@ workflow BLOBTOOLKIT {
     RUN_BLASTN (
         RUN_BLASTX.out.blastx_out,
         PREPARE_GENOME.out.genome,
-        ch_blastn,
+        INPUT_CHECK.out.blastn,
         INPUT_CHECK.out.taxon_id,
     )
 
@@ -187,7 +195,7 @@ workflow BLOBTOOLKIT {
         BUSCO_DIAMOND.out.blastp_txt.ifEmpty([[],[]]),
         RUN_BLASTX.out.blastx_out.ifEmpty([[],[]]),
         RUN_BLASTN.out.blastn_out.ifEmpty([[],[]]),
-        ch_taxdump
+        INPUT_CHECK.out.taxdump
     )
     ch_versions = ch_versions.mix ( BLOBTOOLS.out.versions )