diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b7c0b09..2bc5487 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,6 +10,8 @@ on:
 
 env:
   NXF_ANSI_LOG: false
+  NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity
+  NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity
 
 concurrency:
   group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
@@ -24,9 +26,15 @@ jobs:
     strategy:
       matrix:
         NXF_VER:
-          - "23.04.0"
+          - "22.10.1"
           - "latest-everything"
     steps:
+      - name: Get branch names
+        # Pulls the names of current branches in repo
+        # steps.branch-names.outputs.current_branch is used later and returns the name of the branch the PR is made FROM not to
+        id: branch-names
+        uses: tj-actions/branch-names@v8
+
       - name: Check out pipeline code
         uses: actions/checkout@v3
 
@@ -35,10 +43,34 @@ jobs:
         with:
           version: "${{ matrix.NXF_VER }}"
 
+      - name: Set up Singularity
+        run: |
+          mkdir -p $NXF_SINGULARITY_CACHEDIR
+          mkdir -p $NXF_SINGULARITY_LIBRARYDIR
+
+      - name: Setup apptainer
+        uses: eWaterCycle/setup-apptainer@main
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install nf-core
+        run: |
+          pip install nf-core
+
+      - name: NF-Core Download - download singularity containers
+        # Forcibly download repo on active branch and download SINGULARITY containers into the CACHE dir if not found
+        # Must occur after singularity install or will crash trying to dl containers
+        # Zip up this fresh download and run the checked out version
+        run: |
+          nf-core download sanger-tol/ascc --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-ascc --container-cache-utilisation amend --container-system singularity
+
       - name: Download test data
         # Download A fungal test data set that is full enough to show some real output.
         run: |
-          curl https://tolit.cog.sanger.ac.uk/test-data/resources/ascc/asccTinyTest.tar.gz | tar xzf -
+          curl https://tolit.cog.sanger.ac.uk/test-data/resources/ascc/asccTinyTest_V2.tar.gz | tar xzf -
 
       - name: Download the NCBI taxdump database
         run: |
@@ -48,11 +80,11 @@ jobs:
       - name: Download the FCS-gx database
         run: |
           mkdir FCS_gx
-          wget -c https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.taxa.tsv -O FCS_gx/all.taxa.tsv
-          wget -c https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.gxi -O FCS_gx/all.gxi
-          wget -c https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.gxs -O FCS_gx/all.gxs
-          wget -c https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.meta.jsonl -O FCS_gx/all.meta.jsonl
-          wget -c https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.blast_div.tsv.gz -O FCS_gx/all.blast_div.tsv.gz
+          wget -cq https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.taxa.tsv -O FCS_gx/all.taxa.tsv
+          wget -cq https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.gxi -O FCS_gx/all.gxi
+          wget -cq https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.gxs -O FCS_gx/all.gxs
+          wget -cq https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.meta.jsonl -O FCS_gx/all.meta.jsonl
+          wget -cq https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/database/test-only/test-only.blast_div.tsv.gz -O FCS_gx/all.blast_div.tsv.gz
 
       - name: Download the BUSCO lineage database
         run: |
@@ -72,7 +104,12 @@ jobs:
       - name: Download the pacbio barcode
         run: |
           mkdir pacbio_barcode
-          wget -O pacbio_barcode/SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta_.zip -c https://www.pacb.com/wp-content/uploads/SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta_.zip && cd pacbio_barcode && unzip SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta_.zip && mv SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta pacbio_adaptors.fa && rm -rf SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta_.zip __MACOSX && cd ..
+          wget -O pacbio_barcode/SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta_.zip -c https://www.pacb.com/wp-content/uploads/SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta_.zip
+          cd pacbio_barcode
+          unzip SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta_.zip
+          mv SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta pacbio_adaptors.fa
+          rm -rf SMRTbell_Barcoded_Adapter_Plate_3.0_bc2001-bc2096.fasta_.zip __MACOSX
+          cd ../
 
       - name: Download the subset of Diamond database
         run: |
@@ -84,9 +121,9 @@ jobs:
           mkdir vecscreen
           curl -L https://ftp.ncbi.nlm.nih.gov/blast/db/v4/16SMicrobial_v4.tar.gz | tar -C vecscreen -xzf -
 
-      - name: Run pipeline with test data
+      - name: Singularity - Run FULL pipeline with test data
         # TODO nf-core: You can customise CI pipeline run tests as required
         # For example: adding multiple test runs with different parameters
         # Remember that you can parallelise this by using strategy.matrix
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+          nextflow run ${GITHUB_WORKSPACE} -profile test,singularity --outdir ./results --steps ALL
diff --git a/.nf-core.yml b/.nf-core.yml
index f98122b..d142279 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,20 +1,18 @@
 repository_type: pipeline
 lint:
-  files_exist:
-    - CODE_OF_CONDUCT.md
-    - assets/nf-core-ascc_logo_light.png
-    - docs/images/nf-core-ascc_logo_light.png
-    - docs/images/nf-core-ascc_logo_dark.png
-    - .github/ISSUE_TEMPLATE/config.yml
-    - .github/workflows/awstest.yml
-    - .github/workflows/awsfulltest.yml
-    - conf/igenomes.config
+  files_exist: false
   files_unchanged:
     - CODE_OF_CONDUCT.md
     - assets/nf-core-ascc_logo_light.png
     - docs/images/nf-core-ascc_logo_light.png
     - docs/images/nf-core-ascc_logo_dark.png
     - .github/ISSUE_TEMPLATE/bug_report.yml
+    - .github/workflows/branch.yml
+    - .github/CONTRIBUTING.md
+    - .github/PULL_REQUEST_TEMPLATE.md
+    - .github/workflows/linting_comment.yml
+    - assets/email_template.html
+    - pyproject.toml
     - LICENSE
     - .github/workflows/linting.yml
     - lib/NfcoreTemplate.groovy
diff --git a/README.md b/README.md
index d1784ea..e4e9621 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
 
-[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/)
+[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
 [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
diff --git a/assets/github_testing/test.yaml b/assets/github_testing/test.yaml
index c5a8bbf..fde2e2e 100755
--- a/assets/github_testing/test.yaml
+++ b/assets/github_testing/test.yaml
@@ -1,15 +1,17 @@
-assembly_path: /home/runner/work/ascc/ascc/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa
-assembly_title: asccTinyTest
-pacbio_barcodes: /home/runner/work/ascc/ascc/pacbio_barcode/pacbio_adaptors.fa
-pacbio_multiplexing_barcode_names: "bc2008,bc2009"
-reads_path: /home/runner/work/ascc/ascc/asccTinyTest/pacbio
+assembly_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
+assembly_title: asccTinyTest_V2
+reads_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/pacbio/
 reads_type: "hifi"
+pacbio_barcodes: /home/runner/work/ascc/ascc/pacbio_barcode/pacbio_adaptors.fa
+pacbio_multiplexing_barcode_names: "bc2001,bc2009"
 sci_name: "Plasmodium yoelii yoelii 17XNL"
 taxid: 352914
-mito_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
-plastid_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
+mito_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
+plastid_fasta_path: /home/runner/work/ascc/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
 kmer_len: 7
-## Below this point will need updating as more subworkflows are built
+dimensionality_reduction_methods: "pca,random_trees"
+# all available methods
+# "pca,umap,t-sne,isomap,lle_standard,lle_hessian,lle_modified,mds,se,random_trees,kernel_pca,pca_svd,autoencoder_sigmoid,autoencoder_linear,autoencoder_selu,autoencoder_relu,nmf"
 nt_database: /home/runner/work/ascc/ascc/NT_database/
 nt_database_prefix: 18S_fungal_sequences
 nt_kraken_db_path: /home/runner/work/ascc/ascc/kraken2/kraken2
@@ -20,7 +22,8 @@ busco_lineages_folder: /home/runner/work/ascc/ascc/busco_database/lineages
 fcs_gx_database_path: /home/runner/work/ascc/ascc/FCS_gx/
 diamond_uniprot_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
 diamond_nr_database_path: /home/runner/work/ascc/ascc/diamond/UP000000212_1234679_tax.dmnd
-vecscreen_database_path: /home/runner/work/ascc/ascc/vecscreen
+vecscreen_database_path: /home/runner/work/ascc/ascc/vecscreen/
 seqkit:
   sliding: 6000
   window: 100000
+n_neighbours: 13
diff --git a/assets/static-args.yaml b/assets/static-args.yaml
new file mode 100755
index 0000000..6ae120e
--- /dev/null
+++ b/assets/static-args.yaml
@@ -0,0 +1,3 @@
+kmer_size: 7
+n_neighbors_setting: 13
+autoencoder_epochs_count: -1
diff --git a/assets/test.yaml b/assets/test.yaml
index e9eafa2..122db32 100755
--- a/assets/test.yaml
+++ b/assets/test.yaml
@@ -1,14 +1,17 @@
-assembly_path: /lustre/scratch123/tol/teams/tola/users/ea10/pipeline_testing/20231114_pyoelii_vecscreen/ref/PlasmoDB-58_Pyoeliiyoelii17XNL_Genome_with_adapters2_fh2.fasta
-assembly_title: asccTinyTest
-reads_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/pacbio/
+assembly_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/assembly/pyoelii_tiny_testfile_with_adapters.fa
+assembly_title: asccTinyTest_V2
+reads_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/pacbio/
 reads_type: "hifi"
 pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa
 pacbio_multiplexing_barcode_names: "bc2008,bc2009"
 sci_name: "Plasmodium yoelii yoelii 17XNL"
 taxid: 352914
-mito_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
-plastid_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
+mito_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_mitochondrion_ncbi.fa
+plastid_fasta_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/asccTinyTest_V2/organellar/Pyoeliiyoelii17XNL_apicoplast_ncbi.fa
 kmer_len: 7
+dimensionality_reduction_methods: "pca,random_trees"
+# all available methods
+# "pca,umap,t-sne,isomap,lle_standard,lle_hessian,lle_modified,mds,se,random_trees,kernel_pca,pca_svd,autoencoder_sigmoid,autoencoder_linear,autoencoder_selu,autoencoder_relu,nmf"
 nt_database: /data/blastdb/Supported/NT/202308/dbv4/
 nt_database_prefix: nt
 nt_kraken_db_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/nt/nt
@@ -17,9 +20,10 @@ ncbi_taxonomy_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdu
 ncbi_rankedlineage_path: /lustre/scratch123/tol/teams/tola/users/ea10/databases/taxdump/rankedlineage.dmp
 busco_lineages_folder: /lustre/scratch123/tol/resources/busco/data/v5/2021-08-27/lineages
 fcs_gx_database_path: /lustre/scratch124/tol/projects/asg/sub_projects/ncbi_decon/0.4.0/gxdb
-vecscreen_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/vecscreen_database
+vecscreen_database_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/vecscreen/
 diamond_uniprot_database_path: /lustre/scratch123/tol/teams/tola/users/ea10/ascc_databases/uniprot/uniprot_reference_proteomes_with_taxonnames.dmnd
 diamond_nr_database_path: /lustre/scratch123/tol/resources/nr/latest/nr.dmnd
 seqkit:
   sliding: 100000
   window: 6000
+n_neighbours: 13
diff --git a/bin/VSlistTo1HitPerLine.py b/bin/VSlistTo1HitPerLine.py
index 87ea39d..42d45a3 100755
--- a/bin/VSlistTo1HitPerLine.py
+++ b/bin/VSlistTo1HitPerLine.py
@@ -5,8 +5,8 @@
 
 This script converts the VecScreen text list output to one line giving the coordinates for each vector segment in the format:
 VecScreen_Category   ID_string   start_position   end_position
-The default is to report Strong, Moderate, and Weak matches and also segments of Suspect Origin. Reporting of any category can be suppressed by including 
---skip_reporting_suspect_hits, --skip_reporting_weak_hits, --skip_reporting_moderate_hits or --skip_reporting_strong_hits on the command line. 
+The default is to report Strong, Moderate, and Weak matches and also segments of Suspect Origin. Reporting of any category can be suppressed by including
+--skip_reporting_suspect_hits, --skip_reporting_weak_hits, --skip_reporting_moderate_hits or --skip_reporting_strong_hits on the command line.
 "No hits" will be reported for any Query sequence that had no matches in any of the selected categories, unless --skip_reporting_no_hits is included on the command line.
 VecScreen errors will be reported unless --skip_reporting_errors is included on the command line.
 Usage:
diff --git a/bin/get_kmers_counts.py b/bin/get_kmers_counts.py
new file mode 100755
index 0000000..808f578
--- /dev/null
+++ b/bin/get_kmers_counts.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+"""
+Script for counting kmer frequencies per sequence in a FASTA file
+Output (STDOUT): kmer counts as a CSV table
+Developed by Eerik Aunin (ea10@sanger.ac.uk)
+"""
+
+import argparse
+import general_purpose_functions as gpf
+import kcounter
+from collections import OrderedDict
+import pandas as pd
+
+
+def main(fasta_path, out_path, kmer_size):
+    fasta_data = gpf.read_fasta_in_chunks(fasta_path)
+    nucleotides_collection = list()
+    for header, seq in fasta_data:
+        seq = seq.upper()
+        seq_len = len(seq)
+        nucleotides_dict = kcounter.count_kmers(seq, kmer_size, canonical_kmers=True)
+        relative_counts_dict = OrderedDict()
+        relative_counts_dict["header"] = header
+        relative_counts_dict["seq_len"] = seq_len
+        for kmer in nucleotides_dict:
+            kmer_relative_count = nucleotides_dict[kmer] / seq_len
+            relative_counts_dict[kmer] = kmer_relative_count
+        nucleotides_collection.append(relative_counts_dict)
+    df = pd.DataFrame(nucleotides_collection)
+    df = df.fillna(0)
+    df.to_csv(out_path, index=False)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("-v", "--version", action="version", version="1.0")
+    parser.add_argument("fasta_path", type=str, help="Path to input FASTA file")
+    parser.add_argument("out_path", type=str, help="Path for output CSV file")
+    parser.add_argument("--kmer_size", type=int, help="kmer size (bp). Default: 7", default=7)
+    args = parser.parse_args()
+    main(args.fasta_path, args.out_path, args.kmer_size)
diff --git a/bin/kmer_count_dim_reduction.py b/bin/kmer_count_dim_reduction.py
new file mode 100755
index 0000000..b72d5ba
--- /dev/null
+++ b/bin/kmer_count_dim_reduction.py
@@ -0,0 +1,367 @@
+#!/usr/bin/env python3
+
+"""
+Script for kmer count dimensionality reduction, using multiple methods.
+
+Developed by Eerik Aunin (ea10@sanger.ac.uk)
+
+Modified by Damon-Lee Pointon (dp24)
+"""
+
+# https://machinelearningmastery.com/dimensionality-reduction-algorithms-with-python/
+# https://scikit-learn.org/stable/auto_examples/manifold/plot_compare_methods.html#sphx-glr-auto-examples-manifold-plot-compare-methods-py
+
+import os
+
+matplot_config_path = os.getcwd() + "/matplotconfig/"
+fonts_config_path = os.getcwd() + "/fontconfig/"
+numba_config_path = os.getcwd() + "/numbaconfig/"
+
+for i in [fonts_config_path, matplot_config_path, numba_config_path]:
+    if not os.path.exists(i):
+        os.makedirs(i)
+
+os.environ["OSFONTDIR"] = fonts_config_path
+os.environ["MPLCONFIGDIR"] = matplot_config_path
+os.environ["NUMBA_CACHE_DIR"] = numba_config_path
+
+os.chmod(fonts_config_path, 0o0755)
+os.chmod(matplot_config_path, 0o0755)
+os.chmod(numba_config_path, 0o0755)
+
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn import preprocessing
+import pandas as pd
+
+import umap
+from sklearn import decomposition
+from sklearn import manifold
+from sklearn import ensemble
+
+import tensorflow as tf
+import random
+
+import sys
+import argparse
+from pathlib import Path
+from datetime import datetime
+from functools import reduce
+
+
+# plt.style.use("ggplot")
+
+
+def reset_random_seeds():
+    # https://stackoverflow.com/questions/32419510/how-to-get-reproducible-results-in-keras
+    os.environ["PYTHONHASHSEED"] = str(1)
+    tf.random.set_seed(1)
+    np.random.seed(1)
+    random.seed(1)
+
+
+def print_timestamp(process_name):
+    sys.stderr.write("{}: running {}\n".format(datetime.now(), process_name))
+
+
+def embedding_to_dataframe(embedding, seq_names, plot_title):
+    """
+    Function for taking a dimensionality reduction embedding and converting it into a pandas dataframe
+    """
+    plot_title_underscores = plot_title.replace(" ", "_")
+    embedding_df = pd.DataFrame(embedding)
+    embedding_df.columns = ["embedding_x_" + plot_title_underscores, "embedding_y_" + plot_title_underscores]
+    embedding_df["scaff"] = seq_names
+    return embedding_df
+
+
+def load_data(kmer_counts_file):
+    """
+    Loads kmer counts from an input file, returns the counts as a pandas dataframe and sequence names as a list
+    """
+    if os.path.isfile(kmer_counts_file) is False:
+        sys.stderr.write("kmer counts file ({}) was not found\n".format(kmer_counts_file))
+        sys.exit(1)
+    if os.stat(kmer_counts_file).st_size == 0:
+        sys.stderr.write("kmer counts file ({}) is empty\n".format(kmer_counts_file))
+        sys.exit(1)
+    data = pd.read_csv(kmer_counts_file, sep=",", index_col=0)
+    df_row_count = data.shape[0]
+    if df_row_count == 0:
+        sys.stderr.write("The kmer counts table ({}) has no rows\n".format(kmer_counts_file))
+        sys.exit(1)
+    data_to_cluster = data.drop(["seq_len"], axis=1)
+    df = data_to_cluster
+
+    seq_names = df.index.values.tolist()
+    scalar = preprocessing.MinMaxScaler()
+    scalar.fit(data_to_cluster)
+    df = scalar.transform(data_to_cluster)
+    return df, seq_names
+
+
+def make_loss_plot(model_history, out_folder, activation_mode):
+    """
+    Creates the loss vs epoch plot and saves it as a file
+    """
+    plt.figure(figsize=(12, 6))
+    plt.plot(model_history.history["loss"])
+    plt.title("Autoencoder {}: Loss vs. Epoch".format(activation_mode))
+    plt.ylabel("Loss")
+    plt.xlabel("Epoch")
+    plt.grid(True)
+    plt.savefig(out_folder + "/autoencoder_{}_loss_vs_epoch_plot.png".format(activation_mode))
+    plt.close()
+
+
+def run_autoencoder(df, nr_of_epochs, activation_mode, out_folder):
+    """
+    Function for running autoencoder for dimensionality reduction of kmer counts
+    """
+    input_dim = df.shape[1]
+    # This is the dimension of the latent space (encoding space)
+    latent_dim = 2
+
+    encoder = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Dense(128, activation=activation_mode, input_shape=(input_dim,)),
+            tf.keras.layers.Dense(64, activation=activation_mode),
+            tf.keras.layers.Dense(32, activation=activation_mode),
+            tf.keras.layers.Dense(latent_dim, activation=activation_mode),
+        ]
+    )
+
+    decoder = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Dense(64, activation=activation_mode, input_shape=(latent_dim,)),
+            tf.keras.layers.Dense(128, activation=activation_mode),
+            tf.keras.layers.Dense(256, activation=activation_mode),
+            tf.keras.layers.Dense(input_dim, activation=None),
+        ]
+    )
+
+    autoencoder = tf.keras.models.Model(inputs=encoder.input, outputs=decoder(encoder.output))
+    autoencoder.compile(loss="mse", optimizer="adam")
+    model_history = autoencoder.fit(df, df, epochs=nr_of_epochs, batch_size=32, verbose=0)
+    if out_folder is not None:
+        make_loss_plot(model_history, out_folder, activation_mode)
+    embedding = encoder(df)
+    embedding = embedding.numpy()
+    return embedding
+
+
+def run_tsne(df):
+    """
+    Function for running t-SNE for dimensionality reduction of kmer counts
+    """
+    print_timestamp("t-SNE")
+    t_sne_perplexity = 30
+    df_row_count = df.shape[0]
+    if df_row_count < 2:
+        sys.stderr.write("Cannot run t-SNE because the kmer counts dataframe has only {} rows\n".format(df_row_count))
+    else:
+        if t_sne_perplexity >= df_row_count:
+            t_sne_perplexity = round(df_row_count / 2)  # t-SNE perplexity must be less than n_samples
+            if t_sne_perplexity < 1:
+                t_sne_perplexity = 1
+            sys.stderr.write("Set t-SNE perplexity value to {}\n".format(t_sne_perplexity))
+
+    embedding = manifold.TSNE(n_components=2).fit_transform(df)
+    return embedding
+
+
+def run_dim_reduction(df, selected_method, n_neighbors_setting=-1, autoencoder_epochs_count=-1, out_folder=None):
+    embedding = None
+    embedding_title = None
+    if selected_method == "pca":
+        embedding_title = "PCA"
+        print_timestamp(embedding_title)
+        pca = decomposition.PCA(n_components=2)
+        embedding = pca.fit_transform(df)
+
+    elif selected_method == "umap":
+        embedding_title = "UMAP"
+        print_timestamp(embedding_title)
+        reducer = umap.UMAP(
+            random_state=123, n_neighbors=n_neighbors_setting, min_dist=0.1, n_components=2, metric="euclidean"
+        )
+        embedding = reducer.fit_transform(df)
+
+    elif selected_method == "t-sne":
+        embedding_title = "t-SNE"
+        embedding = run_tsne(df)
+
+    elif selected_method == "isomap":
+        embedding_title = "isomap"
+        print_timestamp(embedding_title)
+        embedding = manifold.Isomap(n_neighbors=n_neighbors_setting, n_components=2).fit_transform(df)
+
+    elif selected_method == "lle_standard":
+        embedding_title = "LLE standard"
+        print_timestamp(embedding_title)
+        embedding = manifold.LocallyLinearEmbedding(
+            n_neighbors=n_neighbors_setting, n_components=2, eigen_solver="dense", method="standard"
+        ).fit_transform(df)
+
+    elif selected_method == "lle_hessian":
+        embedding_title = "LLE standard"
+        print_timestamp(embedding_title)
+        embedding = manifold.LocallyLinearEmbedding(
+            n_neighbors=n_neighbors_setting, n_components=2, eigen_solver="dense", method="hessian"
+        ).fit_transform(df)
+
+    elif selected_method == "lle_modified":
+        embedding_title = "LLE modified"
+        print_timestamp(embedding_title)
+        embedding = manifold.LocallyLinearEmbedding(
+            n_neighbors=n_neighbors_setting, n_components=2, eigen_solver="dense", method="modified"
+        ).fit_transform(df)
+
+    elif selected_method == "mds":
+        embedding_title = "MDS"
+        print_timestamp(embedding_title)
+        embedding = manifold.MDS(max_iter=100, n_init=1, n_components=2).fit_transform(df)
+
+    elif selected_method == "se":
+        embedding_title = "SE"
+        print_timestamp(embedding_title)
+        embedding = manifold.SpectralEmbedding(n_neighbors=n_neighbors_setting, n_components=2).fit_transform(df)
+
+    elif selected_method == "random_trees":
+        # https://scikit-learn.org/stable/auto_examples/manifold/plot_lle_digits.html#sphx-glr-auto-examples-manifold-plot-lle-digits-py
+        embedding_title = "random_trees"
+        print_timestamp(embedding_title)
+        hasher = ensemble.RandomTreesEmbedding(n_estimators=200, random_state=0, max_depth=5)
+        x_transformed = hasher.fit_transform(df)
+        pca = decomposition.TruncatedSVD(n_components=2)
+        embedding = pca.fit_transform(x_transformed)
+
+    elif selected_method == "kernel_pca":
+        # https://www.tutorialspoint.com/scikit_learn/scikit_learn_dimensionality_reduction_using_pca.htm
+        embedding_title = "KernelPCA"
+        print_timestamp(embedding_title)
+        kernel_pca = decomposition.KernelPCA(n_components=2, kernel="sigmoid")
+        embedding = kernel_pca.fit_transform(df)
+
+    elif selected_method == "pca_svd":
+        # https://www.tutorialspoint.com/scikit_learn/scikit_learn_dimensionality_reduction_using_pca.htm
+        embedding_title = "PCA with SVD solver"
+        print_timestamp(embedding_title)
+        pca = decomposition.PCA(n_components=2, svd_solver="randomized")
+        embedding = pca.fit_transform(df)
+
+    elif selected_method == "autoencoder_sigmoid":
+        print_timestamp("Autoencoder sigmoid")
+        # https://ekamperi.github.io/machine%20learning/2021/01/21/encoder-decoder-model.html
+        embedding = run_autoencoder(df, autoencoder_epochs_count, "sigmoid", out_folder)
+
+    elif selected_method == "autoencoder_linear":
+        # https://ekamperi.github.io/machine%20learning/2021/01/21/encoder-decoder-model.html
+        embedding_title = "Autoencoder linear"
+        print_timestamp(embedding_title)
+        embedding = run_autoencoder(df, autoencoder_epochs_count, "linear", out_folder)
+
+    elif selected_method == "autoencoder_tanh":
+        # https://ekamperi.github.io/machine%20learning/2021/01/21/encoder-decoder-model.html
+        embedding_title = "Autoencoder tanh"
+        print_timestamp(embedding_title)
+        embedding = run_autoencoder(df, autoencoder_epochs_count, "tanh", out_folder)
+
+    elif selected_method == "autoencoder_selu":
+        # https://ekamperi.github.io/machine%20learning/2021/01/21/encoder-decoder-model.html
+        embedding_title = "Autoencoder selu"
+        print_timestamp(embedding_title)
+        embedding = run_autoencoder(df, autoencoder_epochs_count, "selu", out_folder)
+
+    elif selected_method == "autoencoder_relu":
+        # https://ekamperi.github.io/machine%20learning/2021/01/21/encoder-decoder-model.html
+        embedding_title = "Autoencoder relu"
+        print_timestamp(embedding_title)
+        embedding = run_autoencoder(df, autoencoder_epochs_count, "relu", out_folder)
+
+    elif selected_method == "nmf":
+        # https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html
+        embedding_title = "Non-Negative Matrix Factorization"
+        print_timestamp(embedding_title)
+        nmf_model = decomposition.NMF(n_components=2, init="random", random_state=0)
+        embedding = nmf_model.fit_transform(df)
+    return embedding, embedding_title
+
+
+def main(kmer_counts_file, out_folder, selected_methods, n_neighbors_setting, autoencoder_epochs_count):
+    reset_random_seeds()
+
+    df, seq_names = load_data(kmer_counts_file)
+    df_row_count = df.shape[0]
+    if df_row_count == 1:
+        sys.stderr.write(
+            "Skipping the dimensionality reduction of kmer counts, as the kmer counts table has only one row"
+        )
+        # Generate an empty file to satisfy nextflow expecting a file from script finishing with no file with small output
+        with open("EMPTY_kmers_dim_reduction_embeddings.csv") as empty_file:
+            empty_file.write("FILE TO SMALL FOR ANALYSIS")
+        sys.exit(0)
+
+    Path(out_folder).mkdir(parents=True, exist_ok=True)
+    selected_methods = selected_methods.split(",")
+
+    if autoencoder_epochs_count == -1:
+        autoencoder_epochs_count = df_row_count
+
+    embeddings_list = list()
+    for selected_method in selected_methods:
+        embedding, embedding_title = run_dim_reduction(
+            df,
+            selected_method,
+            n_neighbors_setting=n_neighbors_setting,
+            autoencoder_epochs_count=autoencoder_epochs_count,
+            out_folder=out_folder,
+        )
+        embedding_df = embedding_to_dataframe(embedding, seq_names, embedding_title)
+        embeddings_list.append(embedding_df)
+
+    out_df = reduce(
+        lambda left, right: pd.merge(left, right, on=["scaff"], how="outer"),  # Merge DataFrames in list
+        embeddings_list,
+    )
+    out_path = out_folder + "/kmers_dim_reduction_embeddings.csv"
+    out_df.to_csv(out_path, index=False)
+
+    os._exit(0)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("-v", "--version", action="version", version="1.0")
+    parser.add_argument("kmer_counts_file", type=str, help="Path to input CSV file with kmer counts")
+    parser.add_argument("out_folder", type=str, help="Path to folder where output files will be written")
+    parser.add_argument(
+        "--selected_methods",
+        type=str,
+        help="Comma separated string with the selected dimensionality reduction methods",
+        default="pca",
+    )
+    parser.add_argument(
+        "--n_neighbors_setting",
+        type=int,
+        help="n_neighbors parameter value for the methods that have this parameter (default: 13)",
+        default=13,
+    )
+    parser.add_argument(
+        "--autoencoder_epochs_count",
+        type=int,
+        help="Autoencoder epochs count (default: assign automatically as 3x number of sequences in input)",
+        default=-1,
+    )
+    args = parser.parse_args()
+    main(
+        args.kmer_counts_file,
+        args.out_folder,
+        args.selected_methods,
+        args.n_neighbors_setting,
+        args.autoencoder_epochs_count,
+    )
diff --git a/bin/kmer_count_dim_reduction_combine_csv.py b/bin/kmer_count_dim_reduction_combine_csv.py
new file mode 100755
index 0000000..e7a912d
--- /dev/null
+++ b/bin/kmer_count_dim_reduction_combine_csv.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+
+"""
+Script to combine kmer count dimensionality reduction embedding csv files.
+Adapted from Eerik Aunin (ea10@sanger.ac.uk), by William Eagles (we3@sanger.ac.uk)
+"""
+
+import pandas as pd
+import argparse
+from functools import reduce
+
+
+def main(embedding_paths, output_name):
+    # Load csv as dataframes.
+    dataframe_list = list()
+    for embedding_path in embedding_paths:
+        df = pd.read_csv(f"{embedding_path}/kmers_dim_reduction_embeddings.csv")
+        dataframe_list.append(df)
+
+    # Merge DataFrames in list
+    out_df = reduce(lambda left, right: pd.merge(left, right, on=["scaff"], how="outer"), dataframe_list)
+
+    # Output as csv.
+    out_df.to_csv(output_name, index=False)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("-v", "--version", action="version", version="1.0")
+    parser.add_argument(
+        "-i", "--kmer_counts_file", nargs="+", type=str, help="List of paths to input CSV file with kmer counts"
+    )
+    parser.add_argument("-o", "--output_name", type=str, help="Path to folder where output file will be written")
+    args = parser.parse_args()
+    main(args.kmer_counts_file, args.output_name)
diff --git a/bin/reformat_diamond_outfmt6.py b/bin/reformat_diamond_outfmt6.py
index 8d88cf4..91c916f 100755
--- a/bin/reformat_diamond_outfmt6.py
+++ b/bin/reformat_diamond_outfmt6.py
@@ -15,7 +15,7 @@
 import general_purpose_functions as gpf
 
 if sys.argv[1] == "-v":
-    print("1.0.0")
+    print("1.0")
     sys.exit()
 else:
     in_path = sys.argv[1]
diff --git a/conf/test.config b/conf/test.config
index 5ae71a6..ce291c2 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -15,8 +15,8 @@ params {
     config_profile_description = 'Minimal test dataset to check pipeline function'
 
     // Limit resources so that this can run on GitHub Actions
-    max_cpus   = 2
-    max_memory = '6.GB'
+    max_cpus   = 4
+    max_memory = '10.GB'
     max_time   = '6.h'
 
     input  = "${projectDir}/assets/github_testing/test.yaml"
diff --git a/modules/local/get_kmer_counts.nf b/modules/local/get_kmer_counts.nf
new file mode 100755
index 0000000..b0742fe
--- /dev/null
+++ b/modules/local/get_kmer_counts.nf
@@ -0,0 +1,53 @@
+process GET_KMER_COUNTS {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "conda-forge::python=3.9 conda-forge::kcounter=0.1.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-d9b9334c4a0777c7722cbcc301a10ddc8684a85f:796f1cea66b7720fa29583d1f6b90404f90dde2f-0' :
+        'biocontainers/mulled-v2-d9b9334c4a0777c7722cbcc301a10ddc8684a85f:796f1cea66b7720fa29583d1f6b90404f90dde2f-0' }"
+
+    input:
+    tuple val(meta), path(input_fasta)
+    val kmer_size
+
+    output:
+    tuple val(meta), path( "*_kmer_counts.csv" ) , emit: csv
+    path "versions.yml"                          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def KCOUNTER_VERSION = "0.1.1"
+    def prefix = args.ext.prefix ?: "${meta.id}"
+    """
+    get_kmers_counts.py \\
+        $input_fasta \\
+        ${prefix}_kmer_counts.csv \\
+        --kmer_size $kmer_size
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        kcounter: $KCOUNTER_VERSION
+        general_purpose_functions.py: \$(general_purpose_functions.py --version | cut -d' ' -f2)
+        get_kmers_counts.py: \$(get_kmers_counts.py --version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+
+    stub:
+    def KCOUNTER_VERSION = "0.1.1"
+    def prefix = args.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_kmer_counts.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        kcounter: $KCOUNTER_VERSION
+        general_purpose_functions.py: \$(general_purpose_functions.py --version | cut -d' ' -f2)
+        get_kmers_counts.py: \$(get_kmers_counts.py --version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/kmer_count_dim_reduction.nf b/modules/local/kmer_count_dim_reduction.nf
new file mode 100755
index 0000000..f80b06b
--- /dev/null
+++ b/modules/local/kmer_count_dim_reduction.nf
@@ -0,0 +1,65 @@
+process KMER_COUNT_DIM_REDUCTION {
+
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "conda-forge::python=3.9 conda-forge::pandas=2.2.1 conda-forge::tensorlflow=2.15.0 conda-forge::scikit-learn=1.4.1 conda-forge::umap=0.5.5 conda-forge::matplotlib=3.8.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-ac95cc1cb32439236d915b38af3e056ce8eb0375:34bd58763b84a3ea2f6c60b87b4858b2f80c070e-0' :
+        'biocontainers/mulled-v2-ac95cc1cb32439236d915b38af3e056ce8eb0375:34bd58763b84a3ea2f6c60b87b4858b2f80c070e-0' }"
+
+    input:
+    tuple val(meta), path(kmer_counts_file)
+    val dimensionality_reduction_method
+    val n_neighbors_setting
+    val autoencoder_epochs_count
+
+    output:
+    path '*_kmers_dim_reduction_embeddings.csv', emit: csv
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def UMAP_VERSION = "0.5.5"
+    def prefix = args.ext.prefix ?: "${meta.id}"
+    """
+
+    kmer_count_dim_reduction.py \\
+        $kmer_counts_file \\
+        ${prefix}_${dimensionality_reduction_method}_kmers_dim_reduction_embeddings.csv \\
+        --selected_methods $dimensionality_reduction_method \\
+        --n_neighbors_setting $n_neighbors_setting \\
+        --autoencoder_epochs_count $autoencoder_epochs_count
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        pandas: \$(python3 -c 'import pandas; print(pandas.__version__)')
+        tensorflow: \$(python3 -c 'import tensorflow; print(tensorflow.__version__)')
+        scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
+        umap-learn: $UMAP_VERSION
+        matplotlib: \$(python3 -c 'import matplotlib; print(matplotlib.__version__)')
+        kmer_count_dim_reduction.py: \$(kmer_count_dim_reduction.py --version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+
+    stub:
+    def UMAP_VERSION = "0.5.5"
+    def prefix = args.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_kmers_dim_reduction_embeddings.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python3 --version | sed 's/Python //g')
+        pandas: \$(python3 -c 'import pandas; print(pandas.__version__)')
+        tensorflow: \$(python3 -c 'import tensorflow; print(tensorflow.__version__)')
+        scikit-learn: \$(python3 -c "import sklearn; print(sklearn.__version__)")
+        umap-learn: $UMAP_VERSION
+        matplotlib: \$(python3 -c 'import matplotlib; print(matplotlib.__version__)')
+        kmer_count_dim_reduction.py: \$(kmer_count_dim_reduction.py --version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+}
diff --git a/modules/local/kmer_count_dim_reduction_combine_csv.nf b/modules/local/kmer_count_dim_reduction_combine_csv.nf
new file mode 100755
index 0000000..cdd2d00
--- /dev/null
+++ b/modules/local/kmer_count_dim_reduction_combine_csv.nf
@@ -0,0 +1,47 @@
+process KMER_COUNT_DIM_REDUCTION_COMBINE_CSV {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
+        'quay.io/biocontainers/pandas:1.5.2' }"
+
+    input:
+    tuple val(meta), path(input_files)
+
+    output:
+    path '*_kmers_dim_reduction_embeddings_combined.csv', emit: csv
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def prefix = args.ext.prefix ?: "${meta.id}"
+    """
+    kmer_count_dim_reduction_combine_csv.py \\
+        -o ${prefix}_kmers_dim_reduction_embeddings_combined.csv \\
+        -i $input_files
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        pandas: \$(python3 -c 'import pandas; print(pandas.__version__)')
+        kmer_count_dim_reduction_combine_csv.py: \$(kmer_count_dim_reduction.py --version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = args.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}_kmers_dim_reduction_embeddings_combined.csv
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version | sed 's/Python //g')
+        pandas: \$(python3 -c 'import pandas; print(pandas.__version__)')
+        kmer_count_dim_reduction.py: \$(kmer_count_dim_reduction.py --version | cut -d' ' -f2)
+    END_VERSIONS
+    """
+}
diff --git a/nextflow.config b/nextflow.config
index eb78343..422de16 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -12,6 +12,7 @@ params {
     // TODO nf-core: Specify your pipeline's command line flags
     // Input options
     input                      = null
+    outdir                     = "results"
     tracedir                   = "${outdir}/pipeline_info/"
 
     // MultiQC options
@@ -218,7 +219,7 @@ manifest {
     homePage        = 'https://github.com/sanger-tol/ascc'
     description     = """Pipeline for detecting cobionts and contaminants in genome assemblies"""
     mainScript      = 'main.nf'
-    nextflowVersion = '!>=23.04.0'
+    nextflowVersion = '!>=22.10.1'
     version         = '1.0dev'
     doi             = ''
 }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 68ba8f1..f93d416 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -195,7 +195,7 @@
                 "tracedir": {
                     "type": "string",
                     "description": "Directory to keep pipeline Nextflow logs and reports.",
-                    "default": "${params.outdir}/treeval_info",
+                    "default": "results/pipeline_info/",
                     "fa_icon": "fas fa-cogs",
                     "hidden": true
                 },
diff --git a/subworkflows/local/get_kmers_profile.nf b/subworkflows/local/get_kmers_profile.nf
new file mode 100755
index 0000000..de11712
--- /dev/null
+++ b/subworkflows/local/get_kmers_profile.nf
@@ -0,0 +1,96 @@
+#!/usr/bin/env nextflow
+
+//
+// MODULE IMPORT BLOCK
+//
+include { GET_KMER_COUNTS } from '../../modules/local/get_kmer_counts'
+include { KMER_COUNT_DIM_REDUCTION } from '../../modules/local/kmer_count_dim_reduction'
+include { KMER_COUNT_DIM_REDUCTION_COMBINE_CSV } from '../../modules/local/kmer_count_dim_reduction_combine_csv'
+
+workflow GET_KMERS_PROFILE {
+    take:
+    assembly_fasta                      // Channel [ val(meta), path(file) ]
+    kmer_size                           // Channel [ val(integer) ]
+    dimensionality_reduction_methods    // Channel [ val(string) ]
+    n_neighbors_setting                 // Channel [ val(string) ]
+    autoencoder_epochs_count            // Channel [ val(integer) ]
+
+    main:
+    ch_versions     = Channel.empty()
+
+    //
+    // LOGIC: REFACTORING REFERENCE TUPLE
+    //
+    assembly_fasta
+        .map{ meta, file ->
+            tuple([id: meta.id,
+                    single_end: true],
+                file
+            )
+        }
+        .set { modified_input }
+
+    //
+    // MODULE: PRODUCE KMER COUNTS (USING KCOUNTER)
+    //
+    GET_KMER_COUNTS (
+        modified_input,      // val(meta), path(reads)
+        kmer_size            // val kmer_size
+    )
+    ch_versions = ch_versions.mix(GET_KMER_COUNTS.out.versions)
+
+    //
+    // LOGIC: CREATE CHANNEL OF LIST OF SELECTED METHODS
+    //
+    dimensionality_reduction_methods
+        .splitCsv(sep: ',')
+        .flatten()
+        .combine( GET_KMER_COUNTS.out.csv )
+        .combine( n_neighbors_setting )
+        .combine( autoencoder_epochs_count )
+        .multiMap { dim_red_method, csv_meta, csv_val, nn_setting, ae_setting ->
+            kmer_csv :   tuple ([ id: csv_meta.id, single_end: true], csv_val)
+            dimensionality_reduction_method : dim_red_method
+            n_neighbors_setting : nn_setting
+            autoencoder_epochs_count : ae_setting
+        }
+        .set{ ch_methods }
+
+    //
+    // MODULE: DIMENSIONALITY REDUCTION OF KMER COUNTS, USING SPECIFIED METHODS
+    //
+    KMER_COUNT_DIM_REDUCTION (
+        ch_methods.kmer_csv,                         // val(meta), path(kmer_counts)
+        ch_methods.dimensionality_reduction_method,  // val dimensionality_reduction_method
+        ch_methods.n_neighbors_setting,              // val n_neighbors_setting
+        ch_methods.autoencoder_epochs_count          // val autoencoder_epochs_count
+    )
+    ch_versions = ch_versions.mix(KMER_COUNT_DIM_REDUCTION.out.versions)
+
+    //
+    // LOGIC: PREPARING INPUT TO COMBINE OUTPUT CSV FOR EACH METHOD
+    //
+    KMER_COUNT_DIM_REDUCTION.out.csv
+        .collect()
+        .map { file ->
+            tuple (
+                    [   id  :   file[0].toString().split('/')[-1].split('_')[0] ], // Change sample ID
+                    file
+            )
+        }
+        .set { collected_files_for_combine }
+
+    collected_files_for_combine.view()
+
+    //
+    // MODULE: COMBINE OUTPUTS OF MULTIPLE METHODS
+    //
+    KMER_COUNT_DIM_REDUCTION_COMBINE_CSV (
+        collected_files_for_combine
+    )
+    ch_versions = ch_versions.mix(KMER_COUNT_DIM_REDUCTION_COMBINE_CSV.out.versions)
+
+    emit:
+    combined_csv = KMER_COUNT_DIM_REDUCTION_COMBINE_CSV.out.csv
+    versions     = ch_versions.ifEmpty(null)
+}
diff --git a/subworkflows/local/run_fcsgx.nf b/subworkflows/local/run_fcsgx.nf
index a748f67..3e8c6d4 100644
--- a/subworkflows/local/run_fcsgx.nf
+++ b/subworkflows/local/run_fcsgx.nf
@@ -25,7 +25,8 @@ workflow RUN_FCSGX {
     reference
         .combine( taxid )
         .map { it ->
-                tuple ([taxid: it[2]],
+                tuple ([    id:     it[0].id,
+                            taxid:  it[2]       ],
                         it[1])
             }
         .set { reference_with_taxid }
diff --git a/subworkflows/local/run_read_coverage.nf b/subworkflows/local/run_read_coverage.nf
index 377d17d..001d2f4 100644
--- a/subworkflows/local/run_read_coverage.nf
+++ b/subworkflows/local/run_read_coverage.nf
@@ -12,17 +12,17 @@ workflow RUN_READ_COVERAGE {
     reference_tuple          // Channel [ val(meta), path(file) ]
     assembly_path            // Channel path(file)
     pacbio_tuple             // Channel [ val(meta), val( str ) ]
-    platform                // Channel val( str )
+    platform                 // Channel val( str )
 
     main:
     ch_versions     = Channel.empty()
     ch_align_bam    = Channel.empty()
 
-    
+
     //
     // LOGIC: CHECK IF THE INPUT READ FILE IS PAIRED END OR SINGLE END BASED ON THE READ PLATFORM, THEN RUN MINIMAP
     //
-    if ( platform.filter { it == "hifi" } || platform.filter { it == "clr" } || platform.filter { it == "ont" } ) { 
+    if ( platform.filter { it == "hifi" } || platform.filter { it == "clr" } || platform.filter { it == "ont" } ) {
         SE_MAPPING (
             reference_tuple,
             assembly_path,
@@ -34,7 +34,7 @@ workflow RUN_READ_COVERAGE {
             .mix( SE_MAPPING.out.mapped_bam )
             .set { merged_bam }
     }
-    else if ( platform.filter { it == "illumina" } ) { 
+    else if ( platform.filter { it == "illumina" } ) {
 
         PE_MAPPING  (
             reference_tuple,
@@ -73,7 +73,7 @@ workflow RUN_READ_COVERAGE {
     )
     ch_versions = ch_versions.mix( SAMTOOLS_DEPTH.out.versions )
 
-    
+
     //
     // MODULE: COMPUTE THE AVERAGE COVERAGE
     //
diff --git a/subworkflows/local/run_vecscreen.nf b/subworkflows/local/run_vecscreen.nf
index 27a38b1..fce4395 100644
--- a/subworkflows/local/run_vecscreen.nf
+++ b/subworkflows/local/run_vecscreen.nf
@@ -24,6 +24,8 @@ workflow RUN_VECSCREEN {
     //
     // MODULE: RUNS NCBI VECSCREEN
     //
+    vecscreen_database_tuple.view()
+
     NCBITOOLS_VECSCREEN(
         CHUNK_ASSEMBLY_FOR_VECSCREEN.out.chunked_assembly,
         vecscreen_database_tuple
diff --git a/subworkflows/local/se_mapping.nf b/subworkflows/local/se_mapping.nf
index 1c3f69c..7ce3657 100644
--- a/subworkflows/local/se_mapping.nf
+++ b/subworkflows/local/se_mapping.nf
@@ -72,7 +72,6 @@ workflow SE_MAPPING {
             se_input.bool_cigar_bam
     )
     ch_bams = MINIMAP2_ALIGN_SE.out.bam
-   
 
     ch_bams
         .map { meta, file ->
@@ -110,7 +109,7 @@ process GrabFiles {
     tuple val(meta), path("in")
 
     output:
-    tuple val(meta), path("in/*.{fa,fasta}.{gz}")
+    tuple val(meta), path("in/*.{fa,fasta,fna}.{gz}")
 
     "true"
 }
\ No newline at end of file
diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf
index 964f29d..6ed9ea4 100644
--- a/subworkflows/local/yaml_input.nf
+++ b/subworkflows/local/yaml_input.nf
@@ -21,7 +21,7 @@ workflow YAML_INPUT {
         .multiMap { data ->
                 assembly_title:                                 ( data.assembly_title                   )
                 reads_path:                                     ( data.reads_path                       )
-                reads_type:                                     ( data.reads_type                         )
+                reads_type:                                     ( data.reads_type                       )
                 assembly_path:                                  ( file(data.assembly_path)              )
                 pacbio_barcodes:                                ( file(data.pacbio_barcodes)            )
                 pacbio_multiplexing_barcode_names:              ( data.pacbio_multiplexing_barcode_names)
@@ -33,16 +33,17 @@ workflow YAML_INPUT {
                 reference_proteomes:                            ( data.reference_proteomes              )
                 nt_kraken_db_path:                              ( data.nt_kraken_db_path                )
                 kmer_len:                                       ( data.kmer_len                         )
+                dimensionality_reduction_methods:               ( data.dimensionality_reduction_methods )
                 fcs_gx_database_path:                           ( data.fcs_gx_database_path             )
                 ncbi_taxonomy_path:                             ( data.ncbi_taxonomy_path               )
                 ncbi_rankedlineage_path:                        ( data.ncbi_rankedlineage_path          )
-                ncbi_accessionids:                              ( data.ncbi_accessionids_folder                 )
+                ncbi_accessionids:                              ( data.ncbi_accessionids_folder         )
                 busco_lineages_folder:                          ( data.busco_lineages_folder            )
                 seqkit_values:                                  ( data.seqkit                           )
                 diamond_uniprot_database_path:                  ( data.diamond_uniprot_database_path    )
                 diamond_nr_database_path:                       ( data.diamond_nr_database_path         )
                 vecscreen_database_path:                        ( data.vecscreen_database_path          )
-
+                neighbours:                                     ( data.n_neighbours                     )
         }
         .set{ group }
 
@@ -127,10 +128,13 @@ workflow YAML_INPUT {
     vecscreen_database_path          = ch_vecscreen
     seqkit_sliding                   = seqkit.sliding_value
     seqkit_window                    = seqkit.window_value
+    dimensionality_reduction_methods = group.dimensionality_reduction_methods
     mito_tuple                       = ch_mito
     mito_var                         = "mitochondrial_genome"
     plastid_tuple                    = ch_plastid
     plastid_var                      = "plastid_genome"
+    kmer_len                         = group.kmer_len
+    n_neighbours                     = group.neighbours
     versions                         = ch_versions.ifEmpty(null)
 }
 
diff --git a/workflows/ascc.nf b/workflows/ascc.nf
index 014b34a..a80033d 100644
--- a/workflows/ascc.nf
+++ b/workflows/ascc.nf
@@ -32,6 +32,7 @@ include { RUN_FCSADAPTOR                                } from '../subworkflows/
 include { RUN_NT_KRAKEN                                 } from '../subworkflows/local/run_nt_kraken'
 include { RUN_FCSGX                                     } from '../subworkflows/local/run_fcsgx'
 include { PACBIO_BARCODE_CHECK                          } from '../subworkflows/local/pacbio_barcode_check'
+include { GET_KMERS_PROFILE                             } from '../subworkflows/local/get_kmers_profile'
 include { RUN_READ_COVERAGE                             } from '../subworkflows/local/run_read_coverage'
 include { RUN_VECSCREEN                                 } from '../subworkflows/local/run_vecscreen'
 include { ORGANELLAR_BLAST as PLASTID_ORGANELLAR_BLAST  } from '../subworkflows/local/organellar_blast'
@@ -63,6 +64,9 @@ workflow ASCC {
 
     main:
     ch_versions = Channel.empty()
+    ch_out_merge = Channel.empty()
+
+    workflow_steps = params.steps.split(",")
 
     input_ch = Channel.fromPath(params.input, checkIfExists: true)
 
@@ -80,6 +84,7 @@ workflow ASCC {
     GC_CONTENT (
         YAML_INPUT.out.reference_tuple
     )
+    ch_out_merge = ch_out_merge.mix(GC_CONTENT.out.txt)
     ch_versions = ch_versions.mix(GC_CONTENT.out.versions)
 
     //
@@ -92,12 +97,40 @@ workflow ASCC {
     ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions)
 
     //
-    // SUBWORKFLOW: EXTRACT RESULTS HITS FROM TIARA
+    // SUBWORKFLOW: COUNT KMERS, THEN REDUCE DIMENSIONS USING SELECTED METHODS
     //
-    EXTRACT_TIARA_HITS (
+
+    if ( workflow_steps.contains('kmers') || workflow_steps.contains('ALL')) {
+
         GENERATE_GENOME.out.reference_tuple
-    )
-    ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions)
+            .map { meta, file ->
+                tuple (
+                    meta,
+                    file,
+                    file.countFasta() * 3
+                )
+            }
+            .set {autoencoder_epochs_count}
+
+        GET_KMERS_PROFILE (
+            GENERATE_GENOME.out.reference_tuple,
+            YAML_INPUT.out.kmer_len,
+            YAML_INPUT.out.dimensionality_reduction_methods,
+            YAML_INPUT.out.n_neighbours,
+            autoencoder_epochs_count.map{it -> it[2]}
+        )
+        ch_versions = ch_versions.mix(GET_KMERS_PROFILE.out.versions)
+    }
+
+    //
+    // SUBWORKFLOW: EXTRACT RESULTS HITS FROM TIARA
+    //
+    if ( workflow_steps.contains('tiara') ) {
+        EXTRACT_TIARA_HITS (
+            GENERATE_GENOME.out.reference_tuple
+        )
+        ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions)
+    }
 
     //
     // LOGIC: INJECT SLIDING WINDOW VALUES INTO REFERENCE
@@ -117,104 +150,136 @@ workflow ASCC {
     //
     // SUBWORKFLOW: EXTRACT RESULTS HITS FROM NT-BLAST
     //
-    EXTRACT_NT_BLAST (
-        modified_input,
-        YAML_INPUT.out.nt_database,
-        YAML_INPUT.out.ncbi_accessions,
-        YAML_INPUT.out.ncbi_rankedlineage_path
-    )
-    ch_versions = ch_versions.mix(EXTRACT_NT_BLAST.out.versions)
-
-    //
-    // LOGIC: CHECK WHETHER THERE IS A MITO AND BRANCH
-    //
-    YAML_INPUT.out.mito_tuple
-        .branch { meta, check ->
-            valid:      check != "NO MITO"
-            invalid:    check == "NO MITO"
-        }
-        .set { mito_check }
+    if ( workflow_steps.contains('nt_blast') || workflow_steps.contains('ALL') ) {
+        EXTRACT_NT_BLAST (
+            modified_input,
+            YAML_INPUT.out.nt_database,
+            YAML_INPUT.out.ncbi_accessions,
+            YAML_INPUT.out.ncbi_rankedlineage_path
+        )
+        ch_versions = ch_versions.mix(EXTRACT_NT_BLAST.out.versions)
+    }
 
+    if ( workflow_steps.contains('mito') || workflow_steps.contains('ALL') ) {
+        //
+        // LOGIC: CHECK WHETHER THERE IS A MITO AND BRANCH
+        //
+        YAML_INPUT.out.mito_tuple
+            .branch { meta, check ->
+                valid:      check != "NO MITO"
+                invalid:    check == "NO MITO"
+            }
+            .set { mito_check }
+
+
+        //
+        // SUBWORKFLOW: BLASTING FOR MITO ASSEMBLIES IN GENOME
+        //
+        MITO_ORGANELLAR_BLAST (
+            YAML_INPUT.out.reference_tuple,
+            YAML_INPUT.out.mito_var,
+            mito_check.valid
+        )
+        ch_versions = ch_versions.mix(MITO_ORGANELLAR_BLAST.out.versions)
+    }
 
-    //
-    // SUBWORKFLOW: BLASTING FOR MITO ASSEMBLIES IN GENOME
-    //
-    MITO_ORGANELLAR_BLAST (
-        YAML_INPUT.out.reference_tuple,
-        YAML_INPUT.out.mito_var,
-        mito_check.valid
-    )
-    ch_versions = ch_versions.mix(MITO_ORGANELLAR_BLAST.out.versions)
+    if ( workflow_steps.contains('chloro') || workflow_steps.contains('ALL') ) {
+
+        //
+        // LOGIC: CHECK WHETHER THERE IS A PLASTID AND BRANCH
+        //
+        YAML_INPUT.out.plastid_tuple
+            .branch { meta, check ->
+                valid:      check != "NO PLASTID"
+                invalid:    check == "NO PLASTID"
+            }
+            .set { plastid_check }
+
+        //
+        // SUBWORKFLOW: BLASTING FOR PLASTID ASSEMBLIES IN GENOME
+        //
+        PLASTID_ORGANELLAR_BLAST (
+            YAML_INPUT.out.reference_tuple,
+            YAML_INPUT.out.plastid_var,
+            plastid_check.valid
+        )
+        ch_versions = ch_versions.mix(PLASTID_ORGANELLAR_BLAST.out.versions)
+    }
 
-    //
-    // LOGIC: CHECK WHETHER THERE IS A PLASTID AND BRANCH
-    //
-    YAML_INPUT.out.plastid_tuple
-        .branch { meta, check ->
-            valid:      check != "NO PLASTID"
-            invalid:    check == "NO PLASTID"
-        }
-        .set { plastid_check }
-
-    //
-    // SUBWORKFLOW: BLASTING FOR PLASTID ASSEMBLIES IN GENOME
-    //
-    PLASTID_ORGANELLAR_BLAST (
-        YAML_INPUT.out.reference_tuple,
-        YAML_INPUT.out.plastid_var,
-        plastid_check.valid
-    )
-    ch_versions = ch_versions.mix(PLASTID_ORGANELLAR_BLAST.out.versions)
 
     //
     // SUBWORKFLOW:
     //
-    RUN_FCSADAPTOR (
-        YAML_INPUT.out.reference_tuple
-    )
-    ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)
-
+    if ( workflow_steps.contains('fcs_adapt') || workflow_steps.contains('ALL') ) {
+        RUN_FCSADAPTOR (
+            YAML_INPUT.out.reference_tuple
+        )
+        ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)
+    }
     //
     // SUBWORKFLOW:
     //
-    RUN_FCSGX (
-        YAML_INPUT.out.reference_tuple,
-        YAML_INPUT.out.fcs_gx_database_path,
-        YAML_INPUT.out.taxid,
-        YAML_INPUT.out.ncbi_rankedlineage_path
-    )
-    ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)
+    if ( workflow_steps.contains('fcsgx') || workflow_steps.contains('ALL') ) {
+        RUN_FCSGX (
+            YAML_INPUT.out.reference_tuple,
+            YAML_INPUT.out.fcs_gx_database_path,
+            YAML_INPUT.out.taxid,
+            YAML_INPUT.out.ncbi_rankedlineage_path
+        )
+        ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions)
+    }
 
     //
     // SUBWORKFLOW: IDENTITY PACBIO BARCODES IN INPUT DATA
     //
-    PACBIO_BARCODE_CHECK (
-        YAML_INPUT.out.reference_tuple,
-        YAML_INPUT.out.pacbio_tuple,
-        YAML_INPUT.out.pacbio_barcodes,
-        YAML_INPUT.out.pacbio_multiplex_codes
-    )
-    ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions)
+    if ( workflow_steps.contains('barcodes') || workflow_steps.contains('ALL') ) {
+        PACBIO_BARCODE_CHECK (
+            YAML_INPUT.out.reference_tuple,
+            YAML_INPUT.out.pacbio_tuple,
+            YAML_INPUT.out.pacbio_barcodes,
+            YAML_INPUT.out.pacbio_multiplex_codes
+        )
+        ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions)
+    }
 
     //
     // SUBWORKFLOW: CALCULATE AVERAGE READ COVERAGE
     //
-    RUN_READ_COVERAGE (
-        YAML_INPUT.out.reference_tuple,
-        YAML_INPUT.out.assembly_path,
-        YAML_INPUT.out.pacbio_tuple,
-        YAML_INPUT.out.reads_type
-    )
-    ch_versions = ch_versions.mix(RUN_READ_COVERAGE.out.versions)
+    if ( workflow_steps.contains('coverage') || workflow_steps.contains('ALL') ) {
+        RUN_READ_COVERAGE (
+            YAML_INPUT.out.reference_tuple,
+            YAML_INPUT.out.assembly_path,
+            YAML_INPUT.out.pacbio_tuple,
+            YAML_INPUT.out.reads_type
+        )
+        ch_versions = ch_versions.mix(RUN_READ_COVERAGE.out.versions)
+    }
 
     //
     // SUBWORKFLOW: COLLECT SOFTWARE VERSIONS
     //
-    RUN_VECSCREEN (
-        GENERATE_GENOME.out.reference_tuple,
-        YAML_INPUT.out.vecscreen_database_path
-    )
-    ch_versions = ch_versions.mix(RUN_VECSCREEN.out.versions)
+    if ( workflow_steps.contains('vecscreen') || workflow_steps.contains('ALL') ) {
+        RUN_VECSCREEN (
+            GENERATE_GENOME.out.reference_tuple,
+            YAML_INPUT.out.vecscreen_database_path
+        )
+        ch_versions = ch_versions.mix(RUN_VECSCREEN.out.versions)
+    }
+
+    //
+    // SUBWORKFLOW: Run the kraken classifier
+    //
+    if ( workflow_steps.contains('kraken') || workflow_steps.contains('ALL') ) {
+        RUN_NT_KRAKEN(
+            GENERATE_GENOME.out.reference_tuple,
+            YAML_INPUT.out.nt_kraken_db_path,
+            YAML_INPUT.out.ncbi_rankedlineage_path
+        )
+    }
+
+    // mix the outputs of the outpuutting process so that we can
+    // insert them into the one process to create the btk and the merged report
+    // much like the versions channel
 
     //
     // SUBWORKFLOW: Collates version data from prior subworflows