diff --git a/.github/workflows/ci.config b/.github/workflows/ci.config
new file mode 100644
index 00000000..27cc59ae
--- /dev/null
+++ b/.github/workflows/ci.config
@@ -0,0 +1,15 @@
+
+process {
+    executor='slurm'
+    time = '7 d'
+    memory = '4 GB'
+}
+
+singularity {
+    enabled = true
+}
+
+conda {
+    createTimeout = "30 min"
+    useMamba = true
+}
diff --git a/.github/workflows/nextflow-linter.yaml b/.github/workflows/nextflow-linter.yaml
new file mode 100644
index 00000000..7b68f59b
--- /dev/null
+++ b/.github/workflows/nextflow-linter.yaml
@@ -0,0 +1,55 @@
+name: nf-core linting
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-java@v2
+        with:
+          distribution: 'adopt'
+          java-version: '11'
+
+      - name: install Nextflow
+        run: |
+          wget -qO- https://get.nextflow.io | bash
+          chmod +x nextflow
+          mkdir -p $HOME/.local/bin
+          mv nextflow $HOME/.local/bin/
+          echo "$HOME/.local/bin" >> $GITHUB_PATH
+
+      - name: set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+
+      - name: install nf-core tools
+        run: |
+          python -m pip install --upgrade pip
+          pip install nf-core
+          
+      - name: check Nextflow version
+        run: nextflow -version
+
+      # https://nf-co.re/tools/docs/latest/pipeline_lint_tests/
+      - name: create .nf-core.yml
+        run: |
+          cat << EOF > .nf-core.yml
+          repository_type: pipeline
+          lint:
+            actions_awsfulltest: False
+            actions_awstest: False
+            multiqc_config: False
+            schema_lint: False
+            schema_params: False
+          EOF
+
+      - name: run nf-core lint
+        run: nf-core pipelines lint --dir .
diff --git a/README.md b/README.md
index 06bcde9e..0fc91ff7 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,18 @@
 # scxa-tertiary-workflow
 Tertiary component for SCXA workflows
+
+# How to run workflow for tertiary analysis 
+## Prepare data
+```
+bash scripts/data_prep.sh <EXP-ID> [output path]
+```
+## Run for plate
+```
+nextflow run main.nf --slurm -resume --dir_path <EXP-ID with path> [--output_path <PATH>]  [--scanpy_scripts_container <container_id>]
+```
+## Run for droplet
+```
+nextflow run main.nf --slurm -resume --dir_path <EXP-ID with path> --technology droplet [--output_path <PATH>] [--scanpy_scripts_container <container_id>]
+```
+
+If `[--output_path <PATH>]` is not specified results will be `<EXP-ID with path>/results` dir. 
diff --git a/main.nf b/main.nf
index 8b137891..afe544ef 100644
--- a/main.nf
+++ b/main.nf
@@ -1 +1,831 @@
+#!/usr/bin/env nextflow
 
+nextflow.enable.dsl=2
+
+params.scanpy_scripts_container = "quay.io/biocontainers/scanpy-scripts:1.9.301--pyhdfd78af_0"
+params.technology = "plate"
+params.batch_variable = ""
+params.representation = "X_pca"
+params.dir_path = "."
+params.result_dir_path = params.output_path ?: params.dir_path + "/results"
+params.celltype_field = 'NO_CELLTYPE_FIELD'
+params.neighbor_values = ['3', '5', '10', '15', '20', '25', '30', '50', '100']
+params.perplexity_values = ['1', '5', '10', '15', '20', '25', '30', '35', '40', '45', '50']
+params.resolution_values = ['0.1', '0.3', '0.5', '0.7', '1.0', '2.0', '3.0', '4.0', '5.0']
+params.slotname = "louvain_resolution"
+params.clustering_slotname = params.resolution_values.collect { params.slotname + "_" + it }
+params.merged_group_slotname = params.clustering_slotname.collect { it + params.celltype_field }
+
+log.info """
+===============================
+WORKFLOW PARAMETER VALUES
+===============================
+EXP dir path: ${params.dir_path}
+Selected technology: ${params.technology}
+Results results_dir_path: ${params.result_dir_path}
+celltype_field: ${params.celltype_field}
+neighbor_values: ${params.neighbor_values}
+perplexity_values: ${params.perplexity_values}
+resolution_values: ${params.resolution_values}
+slotname: ${params.slotname}
+clustering_slotname: ${params.clustering_slotname}
+merged_group_slotname: ${params.merged_group_slotname}
+batch_variable: ${params.batch_variable}
+representation: ${params.representation}
+===============================
+"""
+
+/*
+ * Column_rearrange_1: Only keeps the specified columns and removes header
+ */
+process Column_rearrange_1 {
+    // Set the output file
+    input:
+      path genemeta
+      val col
+
+    output:
+      path 'filtered_genemeta.txt'
+
+    script:
+    """
+      # Find the column number of the specified gene_id column name
+      col_num=\$(head -n1 "$genemeta" | tr '\\t' '\\n' | grep -n "^$col\$" | cut -d: -f1)
+  
+      # If column is found, extract it; otherwise, raise an error
+      if [[ -z "\$col_num" ]]; then
+          echo "Error: Column '$col' not found in $genemeta" >&2
+          exit 1
+      fi
+  
+      # Extract the gene_id column (without the header)
+      tail -n +2 "$genemeta" | cut -f\$col_num > filtered_genemeta.txt
+    """
+}
+
+/*
+ * Column_rearrange_2: Only keeps the specified columns and removes header
+ */
+process Column_rearrange_2 {
+    // Set the output file
+    input:
+      path genemeta
+      val col1
+      val col2
+
+    output:
+      path 'filtered_genemeta_2.txt'
+
+    script:
+    """
+      # Find the column number of the specified gene_id column name
+      col_num_1=\$(head -n1 "$genemeta" | tr '\\t' '\\n' | grep -n "^$col1\$" | cut -d: -f1)
+      col_num_2=\$(head -n1 "$genemeta" | tr '\\t' '\\n' | grep -n "^$col2\$" | cut -d: -f1)
+  
+      # If either column is not found, raise an error
+      if [[ -z "\$col_num_1" || -z "\$col_num_2" ]]; then
+          echo "Error: Column '$col1' or '$col2' not found in $genemeta" >&2
+          exit 1
+      fi
+  
+      # Extract the gene_id column (without the header)
+      tail -n +2 "$genemeta" | cut -f\$col_num_1,\$col_num_2 > filtered_genemeta_2.txt
+    """
+}
+
+/*
+ * mergeGeneFiles: Merges gene file with genemeta on column 1, and keeps column1 and 4
+ */
+process mergeGeneFiles {
+    input:
+      path gene
+      path filtered_genemeta
+
+    output:
+      path 'merged_genemeta.tsv'
+
+    script:
+    """
+        # Sort both files by the first column for join compatibility
+        sort -k1,1 "$gene" > sorted_gene.txt
+        sort -k1,1 "$filtered_genemeta" > sorted_genemeta.txt
+        
+        # Perform a left join to keep all data from gene file
+        join -a 1 -t \$'\t' -o 0,1.2,2.2 sorted_gene.txt sorted_genemeta.txt | cut -f1,3 > merged_genemeta.tsv
+    """
+}
+
+process scanpy_read_10x {
+    container params.scanpy_scripts_container
+    
+    input:
+        path matrix
+        path genes
+        path barcodes
+        path cellmeta
+        path genemeta
+
+    output:
+        path 'anndata.h5ad'
+
+    script:
+    """
+        #ln -s $matrix matrix.mtx
+        ln -s $genes genes.tsv
+        #ln -s $barcodes barcodes.tsv
+
+        export PYTHONIOENCODING='utf-8'
+        
+        scanpy-read-10x --input-10x-mtx ./ \
+        --var-names 'gene_ids' \
+        --extra-obs $cellmeta \
+        --extra-var $genemeta \
+        --show-obj stdout \
+        --output-format anndata \
+        'anndata.h5ad'
+    """
+}
+
+process scanpy_multiplet_scrublet {
+    container params.scanpy_scripts_container
+    
+    input:
+        path anndata
+        val batch_variable
+
+    output:
+        path 'scrublet.h5ad'
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        if [ -z "$batch_variable" ]; then
+            scanpy-cli multiplet scrublet \
+            --input-format 'anndata' \
+            --output-format 'anndata' \
+            $anndata \
+            scrublet.h5ad
+        else
+            scanpy-cli multiplet scrublet \
+            --input-format 'anndata' \
+            --output-format 'anndata' \
+            --batch-key "$batch_variable" \
+            $anndata \
+            scrublet.h5ad
+        fi
+    """
+}
+
+process scanpy_plot_scrublet {
+    publishDir params.result_dir_path, mode: 'copy', pattern: '(scrublet.png)'
+    container params.scanpy_scripts_container
+    
+    input:
+        path anndata
+
+    output:
+        path 'scrublet.png'
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        scanpy-cli plot scrublet \
+        --input-format "anndata" \
+        --scale-hist-obs "linear" \
+        --scale-hist-sim "linear" \
+        $anndata \
+        scrublet.png
+    """
+}
+
+process scanpy_filter_cells {
+    container params.scanpy_scripts_container
+    
+    input:
+        path anndata
+        val category
+
+    output:
+        path 'filtered_cell_anndata.h5ad'
+
+    script:
+    """
+    n_counts=1500
+    if [[ -n "$category" ]]; then
+        n_counts=750
+    fi
+
+    export PYTHONIOENCODING='utf-8'
+	scanpy-filter-cells --gene-name 'gene_symbols' \
+        --param 'c:n_counts' \$n_counts 1000000000.0 \
+        --param 'c:pct_counts_mito' 0.0 0.35 \
+        --input-format 'anndata' $anndata \
+        --show-obj stdout \
+        --output-format anndata 'filtered_cell_anndata.h5ad' \
+        --export-mtx ./ \
+        $category
+    """
+}
+
+process scanpy_filter_genes {
+    publishDir "${params.result_dir_path}/matrices/raw_filtered", mode: 'copy', pattern: 'matrix.mtx'
+    publishDir "${params.result_dir_path}/matrices/raw_filtered", mode: 'copy', pattern: 'barcodes.tsv'
+    publishDir "${params.result_dir_path}/matrices/raw_filtered", mode: 'copy', pattern: 'genes.tsv'
+    container params.scanpy_scripts_container
+
+    input:
+        path anndata
+        path genes
+
+    output:
+        path 'filtered_gene_anndata.h5ad'
+        path 'matrix.mtx'
+        path 'barcodes.tsv'
+        path 'genes.tsv'
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        scanpy-filter-genes \
+        --param 'g:n_cells' 3.0 1000000000.0 \
+        --subset 'g:index' \
+        $genes \
+        --input-format 'anndata' $anndata \
+        --show-obj stdout \
+        --output-format anndata \
+        'filtered_gene_anndata.h5ad' \
+        --export-mtx ./
+    """
+}
+
+process normalise_data {
+    publishDir "${params.result_dir_path}/matrices/filtered_normalised", mode: 'copy', pattern: 'matrix.mtx'
+    publishDir "${params.result_dir_path}/matrices/filtered_normalised", mode: 'copy', pattern: 'barcodes.tsv'
+    publishDir "${params.result_dir_path}/matrices/filtered_normalised", mode: 'copy', pattern: 'genes.tsv'
+    container params.scanpy_scripts_container
+
+    input:
+        path anndata
+
+    output:
+	path 'normalised_anndata.h5ad'
+	path 'matrix.mtx'
+	path 'barcodes.tsv'
+	path 'genes.tsv'
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        scanpy-normalise-data \
+        --no-log-transform \
+        --normalize-to '1000000.0' \
+        --input-format 'anndata' $anndata \
+        --show-obj stdout \
+        --output-format anndata \
+        'normalised_anndata.h5ad' \
+        --export-mtx ./
+    """
+}
+
+process normalise_internal_data {
+    container params.scanpy_scripts_container
+    
+    input:
+        path anndata
+
+    output:
+        path 'normalised_internal_anndata.h5ad'
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        scanpy-normalise-data \
+        --normalize-to '1000000.0' \
+        --input-format 'anndata' $anndata \
+        --show-obj stdout \
+        --output-format anndata \
+        'normalised_internal_anndata.h5ad' 
+    """
+}
+
+process find_variable_genes {
+    container params.scanpy_scripts_container
+
+    input:
+        path anndata
+        val batch_variable
+
+    output:
+        path 'variable_genes.h5ad'
+
+    script:
+    """
+        batch_variable_tag=""
+        if [[ -n "$batch_variable" ]]; then
+            batch_variable_tag="--batch-key $batch_variable"
+        fi
+
+        export PYTHONIOENCODING='utf-8'
+        scanpy-find-variable-genes \
+        --flavor 'seurat' \
+        --mean-limits 0.0125 1000000000.0 \
+        --disp-limits 0.5 50.0 \
+        --span 0.3 \
+        --n-bins '20' \
+        \$batch_variable_tag \
+        --input-format 'anndata' \
+        $anndata \
+        --show-obj stdout \
+        --output-format anndata 'variable_genes.h5ad'
+    """
+}
+
+process scale_data {
+    container params.scanpy_scripts_container
+
+    input:
+        path anndata
+
+    output:
+        path 'scaled_anndata.h5ad'
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        scanpy-scale-data \
+        --input-format "anndata" \
+        --output-format "anndata" \
+        $anndata \
+        'scaled_anndata.h5ad'
+
+    """
+}
+
+process run_pca {
+    container params.scanpy_scripts_container
+
+    input:
+        path anndata
+
+    output:
+        path 'PCA.h5ad'
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        scanpy-run-pca \
+        --no-zero-center \
+        --svd-solver 'arpack' \
+        --random-state '1234' \
+        --input-format 'anndata' \
+        $anndata \
+        --show-obj stdout \
+        --output-format anndata \
+        'PCA.h5ad'
+    """
+}
+
+process harmony_batch {
+    container params.scanpy_scripts_container
+
+    input:
+        path anndata
+        val batch_variable
+    output:
+        path 'harmony.h5ad'
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        if [[ -n "$batch_variable" ]]; then
+            scanpy-integrate harmony \
+            --batch-key $batch_variable \
+            --basis 'X_pca' \
+            --adjusted-basis 'X_pca_harmony' \
+            --input-format 'anndata' \
+            $anndata \
+            --show-obj stdout \
+            --output-format anndata \
+            'harmony.h5ad'
+        else
+            echo "No batch variables passed, simply passing original input as output unchanged."
+
+            cp $anndata 'harmony.h5ad'
+        fi
+
+    """
+}
+
+process neighbors {
+    container params.scanpy_scripts_container
+
+    input:
+        path anndata
+        val representation
+    output:
+        path 'neighbors.h5ad'
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        scanpy-neighbors \
+        --n-neighbors 15 \
+        --method 'umap' \
+        --metric 'euclidean' \
+        --random-state '0' \
+        --use-rep $representation \
+        --n-pcs '50' \
+        --input-format 'anndata' \
+        $anndata \
+        --show-obj stdout \
+        --output-format anndata \
+        'neighbors.h5ad'
+
+    """
+}
+
+process neighbors_for_umap {
+    container params.scanpy_scripts_container
+
+
+    input:
+        tuple path(anndata), val(n_neighbors)
+        val representation
+    output:
+        path "neighbors_${n_neighbors}.h5ad"
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        scanpy-neighbors \
+            --n-neighbors $n_neighbors \
+            --key-added 'neighbors_n_neighbors_${n_neighbors}' \
+            --method 'umap' \
+            --metric 'euclidean' \
+            --random-state '0' \
+            --use-rep $representation \
+            --n-pcs '50' \
+            --input-format 'anndata' \
+            $anndata \
+            --show-obj stdout \
+            --output-format anndata \
+            'neighbors_${n_neighbors}.h5ad'
+
+    """
+}
+
+process find_clusters {
+    publishDir "${params.result_dir_path}/clusters", mode: 'copy', pattern: 'clusters_resolution_*.tsv'
+    container params.scanpy_scripts_container
+
+    input:
+        tuple path(anndata), val(resolution)
+    output:
+        path "clusters_${resolution}.h5ad"
+	path "clusters_resolution_${resolution}.tsv"
+
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        scanpy-find-cluster louvain \
+        --neighbors-key 'neighbors' \
+        --key-added 'louvain_resolution_${resolution}' \
+        --resolution ${resolution} \
+        --random-state '1234' \
+        --directed \
+        --export-cluster output.tsv \
+        --input-format 'anndata' \
+        $anndata \
+        --show-obj stdout \
+        --output-format anndata \
+        'clusters_${resolution}.h5ad'
+	
+	mv 'output.tsv' 'clusters_resolution_${resolution}.tsv'
+    """
+}
+
+process restore_unscaled {
+    container params.scanpy_scripts_container
+
+    input:
+	tuple path(anndata), path(normalise_internal_data)
+
+    output:
+	path "restore_unscaled_output_${anndata}.h5"
+
+    script:
+    """
+	export PYTHONIOENCODING='utf-8'
+	ln -s $anndata input.h5
+	ln -s $normalise_internal_data r_source.h5
+	python ${projectDir}/scripts/restore_unscaled.py
+	mv output.h5 'restore_unscaled_output_${anndata}.h5'
+    """
+}
+
+process find_markers {
+    publishDir "${params.result_dir_path}/markers", mode: 'copy', pattern: 'markers_*.tsv'
+    errorStrategy 'ignore'
+    container params.scanpy_scripts_container
+
+    input:
+	tuple path(anndata), val(merged_group_slotname)
+
+    output:
+	path "markers_${merged_group_slotname}.h5ad"
+	path "markers_*.tsv"
+
+    script:
+    """
+	VAR="$merged_group_slotname"
+        PREFIX="${params.slotname}_"
+        echo \$VAR
+        echo \$PREFIX
+        n_number="\${VAR#\$PREFIX}"
+        echo \$n_number
+
+    export PYTHONIOENCODING='utf-8'
+
+	scanpy-find-markers \
+	--save 'markers_${merged_group_slotname}.tsv' \
+	--n-genes '100' \
+	--groupby '${merged_group_slotname}' \
+	--key-added 'markers_${merged_group_slotname}' \
+	--method 'wilcoxon' \
+	--use-raw  \
+	--reference 'rest' \
+	--filter-params 'min_in_group_fraction:0.0,max_out_group_fraction:1.0,min_fold_change:1.0'  \
+	--input-format 'anndata' \
+	$anndata  \
+	--show-obj stdout \
+	--output-format anndata \
+	"markers_${merged_group_slotname}.h5ad" \
+        && mv "markers_${merged_group_slotname}.tsv" "markers_resolution_\${n_number}.tsv"
+    """
+}
+
+process run_umap {
+    publishDir "${params.result_dir_path}/umap", mode: 'copy', pattern: 'umap_n_neighbors_*.tsv'
+
+    errorStrategy 'ignore'
+
+    container params.scanpy_scripts_container
+    
+    input:
+        path anndata
+
+    output:
+        path "umap_*.h5ad"
+        path "umap_n_neighbors_*.tsv"
+
+    script:
+    """
+	export PYTHONIOENCODING='utf-8'
+	echo \$PYTHONIOENCODING
+	VAR="$anndata"
+	n_number="\${VAR%.h5ad}"
+	echo \$n_number
+	scanpy-run-umap \
+            --neighbors-key "neighbors_n_\${n_number}" \
+            --key-added "neighbors_n_\${n_number}" \
+            --export-embedding embeddings.tsv \
+            --n-components 2 \
+            --min-dist 0.5 \
+            --spread 1.0 \
+            --alpha 1.0 \
+            --gamma 1.0 \
+            --negative-sample-rate 5 \
+            --random-state 0 \
+            --init-pos 'spectral' \
+            --input-format 'anndata' \
+            $anndata \
+            --show-obj stdout \
+            --output-format anndata \
+            "umap_\${n_number}.h5ad" \
+            && mv "embeddings_neighbors_n_\${n_number}.tsv" umap_n_\${n_number}.tsv
+
+    """
+}
+
+process run_tsne {
+    publishDir "${params.result_dir_path}/tsne", mode: 'copy', pattern: 'tsne_perplexity_*\\.tsv'
+
+    errorStrategy 'ignore'
+    
+    container params.scanpy_scripts_container
+    
+    input:
+        tuple path(anndata), val(perplexity_values)
+        val representation
+
+    output:
+        path "tsne_${perplexity_values}.h5ad"
+	path "tsne_perplexity_${perplexity_values}.tsv"
+
+    script:
+    """
+            export PYTHONIOENCODING='utf-8'
+            scanpy-run-tsne \
+            --use-rep $representation \
+            --export-embedding embeddings.tsv \
+            --perplexity $perplexity_values \
+            --key-added 'perplexity_$perplexity_values' \
+            --early-exaggeration '12.0' \
+            --learning-rate '400.0' \
+            --no-fast-tsne \
+            --random-state 1234  \
+            --input-format 'anndata' \
+            $anndata \
+            --show-obj stdout \
+            --output-format anndata \
+            'tsne_${perplexity_values}.h5ad' \
+            && mv 'embeddings_perplexity_${perplexity_values}.tsv' 'tsne_perplexity_${perplexity_values}.tsv'
+    """
+}
+
+process make_project_file {
+    publishDir params.result_dir_path, mode: 'copy'
+
+    container params.scanpy_scripts_container
+
+    input:
+        path neighbors
+        path scanpy_read_10x
+        path filter_genes
+        path normalise_data
+        path find_markers
+        path TNSEs_mix_UMAPs
+    output:
+        path "project.h5ad"
+    script:
+    """
+        export PYTHONIOENCODING='utf-8'
+        ln -s $neighbors input.h5
+        ln -s $scanpy_read_10x r_source.h5
+        ln -s '$filter_genes' x_source_0.h5
+        ln -s '$normalise_data' x_source_1.h5
+        count=0
+        for i in $find_markers
+        do
+                ln -sf "\${i}" obs_source_\${count}.h5
+                ln -sf "\${i}" uns_source_\${count}.h5
+                count=\$((count + 1))
+                echo "\${count}"
+        done
+        count=0
+        for i in $TNSEs_mix_UMAPs
+        do
+                ln -sf "\${i}" embedding_source_\${count}.h5
+                count=\$((count + 1))
+                echo "\${count}"
+        done
+        python ${projectDir}/scripts/final_project.py
+        mv output.h5 project.h5ad
+    """
+}
+
+workflow {
+
+    // Create input channel (single file via CLI parameter)
+    genemeta = Channel.fromPath("${params.dir_path}/genes_metadata.tsv")
+    genes = Channel.fromPath("${params.dir_path}/genes.tsv")
+    barcodes = Channel.fromPath("${params.dir_path}/barcodes.tsv")
+    matrix = Channel.fromPath("${params.dir_path}/matrix.mtx")
+    cellmeta = Channel.fromPath("${params.dir_path}/cell_metadata.tsv")
+    neighbors_ch = channel.fromList(params.neighbor_values)
+    perplexity_ch = channel.fromList(params.perplexity_values)
+    resolution_ch = channel.fromList(params.resolution_values)
+    merged_group_slotname_ch = Channel.fromList(params.merged_group_slotname)
+
+    Column_rearrange_1(
+        genemeta, 
+        "gene_id"
+    )
+    Column_rearrange_2(
+        genemeta, 
+        "gene_id", 
+        "gene_name"
+    )
+    mergeGeneFiles(
+        genes,
+        Column_rearrange_2.out
+    )
+    scanpy_read_10x(
+        matrix,
+        mergeGeneFiles.out,
+        barcodes,
+        cellmeta,
+        genemeta
+    )
+
+    if ( params.technology == "droplet" ) {
+        SCRUBLET_ch = scanpy_multiplet_scrublet(
+            scanpy_read_10x.out,
+            params.batch_variable
+        )
+        scanpy_plot_scrublet(
+            SCRUBLET_ch
+        )
+        scanpy_filter_cells(
+            SCRUBLET_ch,
+            "--category predicted_doublet False"
+        )
+    }
+    else {
+        scanpy_filter_cells(
+            scanpy_read_10x.out,
+            ""
+        )
+    }
+
+    scanpy_filter_genes(
+        scanpy_filter_cells.out,
+        Column_rearrange_1.out[0]
+    )
+    normalise_data(
+        scanpy_filter_genes.out[0]
+    )
+    normalise_internal_data(
+        scanpy_filter_genes.out[0]
+    )
+    find_variable_genes(
+        normalise_internal_data.out,
+        params.batch_variable
+    )
+
+    if ( params.technology == "droplet" ) {
+        scale_data(
+            find_variable_genes.out
+        )
+        run_pca(
+            scale_data.out
+        )
+    }
+    else {
+        run_pca(
+            find_variable_genes.out
+        )
+    }
+
+    harmony_batch(
+        run_pca.out,
+        params.batch_variable
+    )
+    neighbors(
+        harmony_batch.out,
+        params.representation
+    )
+    neighbors_for_umap(
+        harmony_batch.out.combine(neighbors_ch),
+        params.representation
+    )
+    TNSEs_ch = run_tsne(
+        harmony_batch.out.combine(perplexity_ch),
+        params.representation
+    )[0]
+    //TNSEs_ch
+    //    .filter { it.exitStatus == 0 }
+
+    UMAPs_ch = run_umap(
+        neighbors_for_umap.out.flatten()
+    )[0]
+    //UMAPs_ch
+   //     .filter { it.exitStatus == 0 }
+    find_clusters(
+        neighbors.out.combine(resolution_ch)
+    )
+
+    // Combine the outputs of find_clusters and neighbors processes
+    combined_outputs = find_clusters.out[0].mix(neighbors.out)
+
+    if ( params.technology == "droplet" ) {
+        restore_unscaled (
+	    combined_outputs.combine(normalise_internal_data.out)
+    	)
+	restore_unscaled_files = restore_unscaled.out.map { file ->
+	    // Extract the sample number from the file name
+	    def sampleNumber = file.baseName.replaceFirst('restore_unscaled_output_', '').replaceFirst('clusters', params.slotname).replaceFirst('neighbors',params.celltype_field).replaceFirst('.h5ad','')
+	    [file, sampleNumber] // Create a tuple with sample number and file
+	}
+	find_markers(
+	    restore_unscaled_files
+    	)
+    }
+    else {
+	processed_files = combined_outputs.map { file ->
+         // Extract the sample number from the file name
+         def sampleNumber = file.baseName.replaceFirst('clusters', params.slotname).replaceFirst('neighbors',params.celltype_field)
+         [file, sampleNumber] // Create a tuple with sample number and file
+	}
+        find_markers(
+	    processed_files
+    	)
+    }
+    make_project_file(
+	neighbors.out,
+	scanpy_read_10x.out,
+	scanpy_filter_genes.out[0],
+	normalise_data.out[0],
+	find_markers.out[0].collect(),
+	TNSEs_ch.mix(UMAPs_ch).collect()
+    )
+}
diff --git a/nextflow.config b/nextflow.config
index 8b137891..9f892942 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1 +1,49 @@
+process {
+    executor='slurm'
+    queue="$SCXA_HPC_QUEUE"
+    // clusterOptions="$SCXA_HPC_OPTIONS"
+    time = '7 d'
+    memory = '4 GB'
+    queueSize=500
+    exitReadTimeout='100000 sec'
+    pollInterval = '5sec'
+    // error strategy
+    // errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
+    // memory = { 4.GB * 2 ^task.attempt }
+    // maxRetries = 4
+}
 
+singularity {
+    enabled = true
+    cacheDir = "$SCXA_SINGULARITY_CACHE"
+}
+
+conda {
+    // cacheDir = "$SCXA_WORKFLOW_ROOT/envs"
+    createTimeout = "30 min"
+    useMamba = true
+}
+
+params {
+    dir_path = "."
+    output_path = null
+    result_dir_path = "${params.output_path ?: params.dir_path + '/results'}"
+}
+
+trace {
+    enabled = true
+    file = "${params.result_dir_path}/trace.txt"
+    overwrite = true
+}
+
+timeline {
+    enabled = true
+    file = "${params.result_dir_path}/timeline.html"
+    overwrite = true
+}
+
+report {
+    enabled = true
+    file = "${params.result_dir_path}/report.html"
+    overwrite = true
+}
diff --git a/scripts/data_prep.sh b/scripts/data_prep.sh
new file mode 100755
index 00000000..eb9c2433
--- /dev/null
+++ b/scripts/data_prep.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# This is EMBL-EBI specific script to fetch data from workflow root and put it in a place for downstream workflow to use 
+
+if [ -z "$SCXA_WORKFLOW_ROOT" ]; then
+    echo "Variable SCXA_WORKFLOW_ROOT is not defined or empty. Please load SC env."
+    echo "Exiting..."
+    exit 1;
+fi
+
+if [ -z "$1" ]; then
+    echo "Experiment ID is not provided. Please provide EXP ID"
+    echo "bash data_prep.sh <EXP-ID> [output path]"
+    echo "Exiting..."
+    exit 1;
+fi
+
+EXP_ID=$1
+
+outdir="$(pwd)"
+
+if [ "$2" ]; then
+    outdir=$2
+fi
+
+
+echo "Creating ${outdir}/${EXP_ID} directory"
+mkdir -p ${outdir}/${EXP_ID}
+cd ${outdir}/${EXP_ID}
+
+echo "Copying data to ${outdir}/${EXP_ID}"
+
+cp ${SCXA_WORKFLOW_ROOT}/results/${EXP_ID}/*/bundle/${EXP_ID}.cell_metadata.tsv cell_metadata.tsv
+cp ${SCXA_WORKFLOW_ROOT}/results/${EXP_ID}/*/bundle/filtered_normalised/genes.tsv.gz . && gunzip -f genes.tsv.gz
+cp ${SCXA_WORKFLOW_ROOT}/results/${EXP_ID}/*/bundle/filtered_normalised/matrix.mtx.gz . && gunzip -f matrix.mtx.gz
+cp ${SCXA_WORKFLOW_ROOT}/results/${EXP_ID}/*/bundle/filtered_normalised/barcodes.tsv.gz . && gunzip -f barcodes.tsv.gz
+cp ${SCXA_WORKFLOW_ROOT}/results/${EXP_ID}/*/bundle/reference/gene_annotation.txt genes_metadata.tsv
+
+echo "Copying data for ${EXP_ID} finished"
diff --git a/scripts/final_project.py b/scripts/final_project.py
new file mode 100755
index 00000000..9616ded0
--- /dev/null
+++ b/scripts/final_project.py
@@ -0,0 +1,124 @@
+
+import scanpy as sc
+import anndata
+from numpy import all
+import logging
+import os
+
+adata = sc.read('input.h5')
+
+
+gene_name = 'index'
+qc_vars = list()
+
+
+
+gene_names = getattr(adata.var, gene_name)
+# Define the directory containing your source files
+source_dir = '.'  # Adjust to the appropriate path
+
+ad_s = sc.read('r_source.h5')
+if not all(adata.obs.index.isin(ad_s.obs.index)):
+  logging.error("Specified object for .raw must contain all .obs from main object.")
+  sys.exit(1)
+else:
+  adata.raw = ad_s[adata.obs.index]
+del ad_s
+
+ad_s = sc.read('x_source_0.h5')
+if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
+  if "filtered" == '':
+    logging.error("%sth destination layer for %sth X source not specified" % ("0", "0"))
+    sys.exit(1)
+  adata.layers["filtered"] = ad_s.X
+else:
+  logging.error("X source 0 AnnData file is not compatible to be merged to main AnnData file, different cell names.")
+  sys.exit(1)
+del ad_s
+ad_s = sc.read('x_source_1.h5')
+if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
+  if "normalised" == '':
+    logging.error("%sth destination layer for %sth X source not specified" % ("1", "1"))
+    sys.exit(1)
+  adata.layers["normalised"] = ad_s.X
+else:
+  logging.error("X source 1 AnnData file is not compatible to be merged to main AnnData file, different cell names.")
+  sys.exit(1)
+del ad_s
+
+obs_sources = [file for file in os.listdir(source_dir) if file.startswith('obs_source_') and file.endswith('.h5')]
+
+for idx, obs_source_file in enumerate(sorted(obs_sources)):
+    ad_s = sc.read(os.path.join(source_dir, obs_source_file))
+    if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
+      keys_to_copy = (k for k in ad_s.obs.keys() if "louvain" in k)
+      for k_to_copy in keys_to_copy:
+        suffix=''
+        if k_to_copy in adata.obs:
+            suffix = f"_{idx}"
+    
+        adata.obs[[k_to_copy+suffix]] = ad_s.obs[[k_to_copy]]
+        if k_to_copy in ad_s.uns.keys():
+          adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy]
+    else:
+      logging.error(f"Observation source {idx} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
+      sys.exit(1)
+    del ad_s
+
+
+embedding_sources = [file for file in os.listdir(source_dir) if file.startswith('embedding_source_') and file.endswith('.h5')]
+
+for idx, embedding_file in enumerate(sorted(embedding_sources)):
+    ad_s = sc.read(os.path.join(source_dir, embedding_file))
+    if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
+          # Copy tsne embeddings
+          keys_to_copy = (k for k in ad_s.obsm.keys() if "tsne" in k)
+          for k_to_copy in keys_to_copy:
+              suffix = ''
+              if k_to_copy in adata.obsm:
+                  suffix = f"_{idx}"
+              adata.obsm[k_to_copy + suffix] = ad_s.obsm[k_to_copy]
+          # Copy umap embeddings
+          keys_to_copy = (k for k in ad_s.obsm.keys() if "umap" in k)
+          for k_to_copy in keys_to_copy:
+              suffix = ''
+              if k_to_copy in adata.obsm:
+                  suffix = f"_{idx}"
+              adata.obsm[k_to_copy + suffix] = ad_s.obsm[k_to_copy]
+    else:
+        logging.error(f"Embedding source {idx} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
+        sys.exit(1)
+    del ad_s
+
+
+uns_sources = [file for file in os.listdir(source_dir) if file.startswith('uns_source_') and file.endswith('.h5')]
+for idx, uns_file in enumerate(sorted(uns_sources)):
+  ad_s = sc.read(os.path.join(source_dir, uns_file))
+  if adata.n_obs == ad_s.n_obs and all(adata.obs_names == ad_s.obs_names):
+    keys_to_copy = (k for k in ad_s.uns.keys() if "marker" in k)
+    for k_to_copy in keys_to_copy:
+      suffix=''
+      if k_to_copy in adata.uns:
+          suffix=f"_{idx}"
+      adata.uns[k_to_copy+suffix] = ad_s.uns[k_to_copy]
+  else:
+    logging.error(f"Uns source {idx} AnnData file is not compatible to be merged to main AnnData file, different cell names.")
+    sys.exit(1)
+  del ad_s
+
+
+if len(qc_vars) > 0:
+    pct_top = [50]
+    sc.pp.calculate_qc_metrics(adata, qc_vars=qc_vars, percent_top=pct_top, inplace=True)
+
+if 'n_genes' not in adata.obs.columns:
+    sc.pp.filter_cells(adata, min_genes=0)
+if 'n_counts' not in adata.obs.columns:
+    sc.pp.filter_cells(adata, min_counts=0)
+if 'n_cells' not in adata.var.columns:
+    sc.pp.filter_genes(adata, min_cells=0)
+if 'n_counts' not in adata.var.columns:
+    sc.pp.filter_genes(adata, min_counts=0)
+
+adata.write('output.h5', compression='gzip')
+    
diff --git a/scripts/restore_unscaled.py b/scripts/restore_unscaled.py
new file mode 100755
index 00000000..9e5eef92
--- /dev/null
+++ b/scripts/restore_unscaled.py
@@ -0,0 +1,33 @@
+import scanpy as sc
+import anndata
+from numpy import all
+import logging
+
+adata = sc.read('input.h5')
+
+gene_name = 'index'
+qc_vars = list()
+gene_names = getattr(adata.var, gene_name)
+
+ad_s = sc.read('r_source.h5')
+if not all(adata.obs.index.isin(ad_s.obs.index)):
+  logging.error("Specified object for .raw must contain all .obs from main object.")
+  sys.exit(1)
+else:
+  adata.raw = ad_s[adata.obs.index]
+del ad_s
+
+if len(qc_vars) > 0:
+    pct_top = [50]
+    sc.pp.calculate_qc_metrics(adata, qc_vars=qc_vars, percent_top=pct_top, inplace=True)
+
+if 'n_genes' not in adata.obs.columns:
+    sc.pp.filter_cells(adata, min_genes=0)
+if 'n_counts' not in adata.obs.columns:
+    sc.pp.filter_cells(adata, min_counts=0)
+if 'n_cells' not in adata.var.columns:
+    sc.pp.filter_genes(adata, min_cells=0)
+if 'n_counts' not in adata.var.columns:
+    sc.pp.filter_genes(adata, min_counts=0)
+
+adata.write('output.h5', compression='gzip')