diff --git a/.github/workflows/ecoli.yml b/.github/workflows/ecoli.yml
index b610388..f66c2f0 100644
--- a/.github/workflows/ecoli.yml
+++ b/.github/workflows/ecoli.yml
@@ -38,5 +38,12 @@ jobs:
 
           nextflow run . -profile docker --maxcpus 2 --medcpus 2
           cat grandeur/grandeur_summary.tsv
-          cat grandeur/shigatyper/shigatyper_results.txt
-          cat grandeur/serotypefinder/serotypefinder_results.txt
\ No newline at end of file
+          
+      - name: Check E. coli file
+        run: |
+          for file in grandeur/shigatyper/shigatyper_results.txt grandeur/serotypefinder/serotypefinder_results.txt
+          do
+            head $file
+            wc -l $file
+          done
+  
\ No newline at end of file
diff --git a/.github/workflows/just_msa.yml b/.github/workflows/just_msa.yml
index c0cb407..6910866 100644
--- a/.github/workflows/just_msa.yml
+++ b/.github/workflows/just_msa.yml
@@ -24,13 +24,21 @@ jobs:
         run: |
           docker --version
 
-          wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/864/595/GCA_904864595.1_INF333/GCA_904864595.1_INF333_genomic.fna.gz             && gzip -d GCA_904864595.1_INF333_genomic.fna.gz
-          wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/783/245/GCA_013783245.1_ASM1378324v1/GCA_013783245.1_ASM1378324v1_genomic.fna.gz && gzip -d GCA_013783245.1_ASM1378324v1_genomic.fna.gz
-          wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/026/626/185/GCA_026626185.1_ASM2662618v1/GCA_026626185.1_ASM2662618v1_genomic.fna.gz && gzip -d GCA_026626185.1_ASM2662618v1_genomic.fna.gz 
-          wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/020/808/985/GCA_020808985.1_ASM2080898v1/GCA_020808985.1_ASM2080898v1_genomic.fna.gz && gzip -d GCA_020808985.1_ASM2080898v1_genomic.fna.gz
-          wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/863/225/GCA_904863225.1_KSB1_6J/GCA_904863225.1_KSB1_6J_genomic.fna.gz           && gzip -d GCA_904863225.1_KSB1_6J_genomic.fna.gz
+          wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/864/595/GCA_904864595.1_INF333/GCA_904864595.1_INF333_genomic.fna.gz             && gzip -d GCA_904864595.1_INF333_genomic.fna.gz
+          wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/013/783/245/GCA_013783245.1_ASM1378324v1/GCA_013783245.1_ASM1378324v1_genomic.fna.gz && gzip -d GCA_013783245.1_ASM1378324v1_genomic.fna.gz
+          wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/026/626/185/GCA_026626185.1_ASM2662618v1/GCA_026626185.1_ASM2662618v1_genomic.fna.gz && gzip -d GCA_026626185.1_ASM2662618v1_genomic.fna.gz 
+          wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/020/808/985/GCA_020808985.1_ASM2080898v1/GCA_020808985.1_ASM2080898v1_genomic.fna.gz && gzip -d GCA_020808985.1_ASM2080898v1_genomic.fna.gz
+          wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/904/863/225/GCA_904863225.1_KSB1_6J/GCA_904863225.1_KSB1_6J_genomic.fna.gz           && gzip -d GCA_904863225.1_KSB1_6J_genomic.fna.gz
           
           mkdir fastas
           mv *fna fastas/.
 
           nextflow run . -profile docker,just_msa --maxcpus 2 --medcpus 2
+
+      - name: Check MSA files
+        run: |
+          for file in grandeur/roary/summary_statistics.txt grandeur/iqtree2/iqtree.treefile.nwk grandeur/snp-dists/snp_matrix_with_qc.txt
+          do
+            head $file
+            wc -l $file
+          done
diff --git a/.github/workflows/legionella.yml b/.github/workflows/legionella.yml
index 3a27757..5d28836 100644
--- a/.github/workflows/legionella.yml
+++ b/.github/workflows/legionella.yml
@@ -40,4 +40,11 @@ jobs:
           
           cat grandeur/grandeur_summary.tsv
 
-          cat grandeur/legsta/legsta_summary.csv
\ No newline at end of file
+      - name: Check Legionella file
+        run: |
+          for file in grandeur/legsta/legsta_summary.csv
+          do
+            head $file
+            wc -l $file
+          done
+  
\ No newline at end of file
diff --git a/.github/workflows/run_workflow.yml b/.github/workflows/run_workflow.yml
index ea607bb..6eeea18 100644
--- a/.github/workflows/run_workflow.yml
+++ b/.github/workflows/run_workflow.yml
@@ -31,4 +31,11 @@ jobs:
           mv *fastq.gz reads/.
           nextflow run . -profile docker --maxcpus 2 --medcpus 2
           cat grandeur/grandeur_summary.tsv
-          
+      
+      - name: Check summary files
+        run: |
+          for file in grandeur/mlst/mlst_summary.tsv
+          do
+            head $file
+            wc -l $file
+          done
diff --git a/.github/workflows/salmonella.yml b/.github/workflows/salmonella.yml
index 2b93058..73087ed 100644
--- a/.github/workflows/salmonella.yml
+++ b/.github/workflows/salmonella.yml
@@ -37,5 +37,14 @@ jobs:
           done
 
           nextflow run . -profile docker --maxcpus 2 --medcpus 2
+
           cat grandeur/grandeur_summary.tsv
-          cat grandeur/seqsero2/seqsero2_results.txt
\ No newline at end of file
+
+      - name: Check Salmonella file
+        run: |
+          for file in grandeur/seqsero2/seqsero2_results.txt
+          do
+            head $file
+            wc -l $file
+          done
+  
\ No newline at end of file
diff --git a/.github/workflows/strepA.yml b/.github/workflows/strepA.yml
index 3ed8a18..5a4e1ed 100644
--- a/.github/workflows/strepA.yml
+++ b/.github/workflows/strepA.yml
@@ -38,4 +38,11 @@ jobs:
 
           nextflow run . -profile docker --maxcpus 2 --medcpus 2
           cat grandeur/grandeur_summary.tsv
-          cat grandeur/emmtyper/emmtyper_summary.tsv
+
+      - name: Check Strep pneumo file
+        run: |
+          for file in grandeur/emmtyper/emmtyper_summary.tsv
+          do
+            head $file
+            wc -l $file
+          done
diff --git a/.github/workflows/strep_pneumo.yml b/.github/workflows/strep_pneumo.yml
index da2f3bf..4d4ff70 100644
--- a/.github/workflows/strep_pneumo.yml
+++ b/.github/workflows/strep_pneumo.yml
@@ -38,4 +38,12 @@ jobs:
 
           nextflow run . -profile docker --maxcpus 2 --medcpus 2
           cat grandeur/grandeur_summary.tsv
-          cat grandeur/pbptyper/pbptyper_summary.tsv
\ No newline at end of file
+
+      - name: Check Strep pneumo file
+        run: |
+          for file in grandeur/pbptyper/pbptyper_summary.tsv
+          do
+            head $file
+            wc -l $file
+          done
+  
\ No newline at end of file
diff --git a/.github/workflows/vibrio.yml b/.github/workflows/vibrio.yml
index 7194679..a160a90 100644
--- a/.github/workflows/vibrio.yml
+++ b/.github/workflows/vibrio.yml
@@ -38,4 +38,6 @@ jobs:
 
           nextflow run . -profile docker --maxcpus 2 --medcpus 2
           cat grandeur/grandeur_summary.tsv
-          grep -i vibrio grandeur/fastani/fastani_summary.csv
\ No newline at end of file
+
+      - name: Check Vibrio species
+        run: grep -i vibrio grandeur/fastani/fastani_summary.csv
diff --git a/bin/datasets_download.py b/bin/datasets_download.py
new file mode 100644
index 0000000..48dd79b
--- /dev/null
+++ b/bin/datasets_download.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+'''
+Author: Erin Young
+
+Description:
+
+This script is to get some genome accession from NCBI datasets
+
+EXAMPLE:
+python3 datasets_download.py taxon hits
+'''
+
+import subprocess
+import sys
+
+taxon = sys.argv[1]
+genus, species = taxon.replace('[', '').replace(']', '').split('_')
+print("Looking for accessions for " + genus + " " + species )
+outfile = open('datasets/' + genus + "_" + species + '_genomes.csv', "w")
+
+try:
+  hits = sys.argv[2]
+except:
+  hits = '5'
+
+# putting in the header
+outfile.write('accession,assminfo-refseq-category,assminfo-level,organism-name,assmstats-total-ungapped-len\n')
+
+# Getting representative genomes
+rep = subprocess.Popen(['datasets', 'summary', 'genome', 'taxon', '"' + genus + ' ' + species + '"', '--reference', '--limit', hits, '--as-json-lines'], stdout = subprocess.PIPE)
+dft = subprocess.check_output(['dataformat', 'tsv', 'genome', '--fields' , 'accession,assminfo-refseq-category,assminfo-level,organism-name,assmstats-total-ungapped-len'], stdin = rep.stdout, universal_newlines= True, text='str')
+rep.wait()
+for line in dft.split('\n'):
+  if 'Ungapped Length' not in line and line:
+    if int(line.split('\t')[4]) < 15000000:
+      outfile.write(line.replace('\t',',') + '\n')
+
+# Getting additional genomes
+oth = subprocess.Popen(['datasets', 'summary', 'genome', 'taxon', '"' + genus + ' ' + species + '"', '--limit', hits, '--as-json-lines'], stdout = subprocess.PIPE)
+df2 = subprocess.check_output(['dataformat', 'tsv', 'genome', '--fields' , 'accession,assminfo-refseq-category,assminfo-level,organism-name,assmstats-total-ungapped-len'], stdin = oth.stdout, universal_newlines= True, text='str')
+oth.wait()
+for line in df2.split('\n'):
+  if 'Ungapped Length' not in line and line:
+    if int(line.split('\t')[4]) < 15000000:
+      outfile.write(line.replace('\t',',') + '\n')
+
+outfile.close()
diff --git a/bin/summary_file.py b/bin/summary_file.py
new file mode 100755
index 0000000..5eafdad
--- /dev/null
+++ b/bin/summary_file.py
@@ -0,0 +1,76 @@
+#!/bin/python3
+
+##########################################
+# written by Erin Young                  #
+# for creating summary files with the    #
+# sample id for Grandeur                 #
+##########################################
+
+import os
+import sys
+
+out = sys.argv[2]
+spl = sys.argv[4]
+
+if not os.path.exists(sys.argv[1]):
+    print("File " + sys.argv[1] + " does not exist. Exiting.")
+    exit()
+
+coms = 0
+tabs = 0
+with open(sys.argv[1]) as file:
+    first_line = file.readline()
+    coms = first_line.count('\t')
+    tabs = first_line.count('\t')
+
+if tabs > coms:
+    delim = '\t'
+    print("Predicting tab delimited")
+else:
+    delim = ','
+    print("Predicting comma delimited")
+
+with open(sys.argv[1], 'r') as file:
+    lines = file.readlines()
+    for line in lines:
+        print(line)
+        print(line.split(delim))
+
+outfile = open(sys.argv[2], "w")
+
+final_delim = ','
+header = 'shouldntexist'
+
+# TODO: turn this into a dict
+if sys.argv[3] == 'mlst':
+    final_delim = '\t'
+    header = 'PubMLST'
+    outfile.write('sample\tfilename\tmatching PubMLST scheme\tST\tID1\tID2\tID3\tID4\tID5\tID6\tID7\tID8\tID9\tID10\tID11\tID12\tID13\tID14\tID15\n')
+elif sys.argv[3] == 'shigatyper':
+    final_delim = '\t'
+    header      = 'Number'
+elif sys.argv[3] == 'kleborate':
+    final_delim = '\t'
+    header = 'largest_contig'
+elif sys.argv[3] == 'plasmidfinder' :
+    final_delim = '\t'
+    header = 'Accession number'
+elif sys.argv[3] == 'emmtyper':
+    outfile.write('sample\tIsolate name\tNumber of BLAST hits\tNumber of clusters\tPredicted emm-type\tPosition(s)\tPossible emm-like alleles\temm-like position(s)\tEMM cluster\n')
+    final_delim = '\t'
+    header = 'Number of BLAST hits'
+elif sys.argv[3] == 'serotypefinder':
+    final_delim = '\t'
+    header = 'HSP length'
+
+print("Using final delim " + final_delim + " with sample " + spl + " for " + sys.argv[3])
+
+with open(sys.argv[1]) as file:
+    lines = file.readlines()
+    for line in lines:
+        if header in line:
+            replace = line.replace(delim, final_delim)
+            outfile.write('sample' + final_delim + replace)
+        else:
+            replace = line.replace(delim, final_delim)
+            outfile.write(spl + final_delim + replace)
diff --git a/grandeur.nf b/grandeur.nf
index 516371d..5bd9b5a 100644
--- a/grandeur.nf
+++ b/grandeur.nf
@@ -146,8 +146,10 @@ include { test }                        from "./subworkflows/test"
 // ##### ##### ##### ##### ##### ##### ##### ##### ##### #####
 
 // Creating the summary files
-summary_script = Channel.fromPath(workflow.projectDir + "/bin/summary.py",     type: "file")
-snpmtrx_script = Channel.fromPath(workflow.projectDir + "/bin/HeatCluster.py", type: "file")
+dataset_script = Channel.fromPath(workflow.projectDir + "/bin/datasets_download.py", type: "file")
+snpmtrx_script = Channel.fromPath(workflow.projectDir + "/bin/HeatCluster.py",       type: "file")
+summary_script = Channel.fromPath(workflow.projectDir + "/bin/summary.py",           type: "file")
+summfle_script = Channel.fromPath(workflow.projectDir + "/bin/summary_file.py",      type: "file")
 
 // ##### ##### ##### ##### ##### ##### ##### ##### ##### #####
 
@@ -318,7 +320,8 @@ workflow {
       ch_for_summary.collect(),
       ch_contigs,
       ch_fastani_genomes,
-      ch_genome_ref)
+      ch_genome_ref,
+      dataset_script)
 
     ch_for_summary = ch_for_summary.mix(average_nucleotide_identity.out.for_summary)
     ch_for_flag    = ch_for_flag.mix(average_nucleotide_identity.out.for_flag)
@@ -341,7 +344,8 @@ workflow {
       ch_raw_reads, 
       ch_contigs, 
       ch_for_flag, 
-      ch_size)
+      ch_size,
+      summfle_script)
 
     ch_for_summary = ch_for_summary.mix(information.out.for_summary)
     ch_for_multiqc = ch_for_multiqc.mix(information.out.for_multiqc)
diff --git a/modules/blobtools.nf b/modules/blobtools.nf
index c3c836f..5b1d5fd 100644
--- a/modules/blobtools.nf
+++ b/modules/blobtools.nf
@@ -125,4 +125,4 @@ process blobtools_plot {
 
     grep -vw all blobtools/!{sample}_summary.txt > blobtools/!{sample}_blobtools.txt
   '''
-}
+}
\ No newline at end of file
diff --git a/modules/datasets.nf b/modules/datasets.nf
index b80a287..43de113 100644
--- a/modules/datasets.nf
+++ b/modules/datasets.nf
@@ -1,7 +1,7 @@
 process datasets_summary {
   tag           "${taxon}"
   publishDir    params.outdir, mode: 'copy'
-  container     'staphb/ncbi-datasets:15.2.0'
+  container     'quay.io/uphl/datasets:15.12.0'
   maxForks      10
   //#UPHLICA errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
   //#UPHLICA pod annotation: 'scheduler.illumina.com/presetSize', value: 'standard-medium'
@@ -10,10 +10,10 @@ process datasets_summary {
   //#UPHLICA time '10m'
   
   input:
-  val(taxon)
+  tuple val(taxon), file(script)
 
   output:
-  path "datasets/*_genomes.csv"                                 , emit: genomes
+  path "datasets/*_genomes.csv"                          , emit: genomes, optional: true
   path "logs/${task.process}/*.${workflow.sessionId}.log", emit: log
 
   shell:
@@ -28,32 +28,18 @@ process datasets_summary {
     echo "Nextflow command : " >> $log_file
     cat .command.sh >> $log_file
 
-    taxon="$(echo !{taxon} | tr '_' ' ' | sed 's/[//g' | sed 's/]//g' )"
-    echo "the taxon is now $taxon"
-
-    datasets summary genome taxon "$taxon" --reference --limit !{params.datasets_max_genomes} --as-json-lines | \
-      dataformat tsv genome --fields accession,assminfo-refseq-category,assminfo-level,organism-name,assmstats-total-ungapped-len | \
-      grep -v Homo | \
-      tr '\\t' ',' \
-      > datasets/!{taxon}_genomes.csv
-
-    datasets summary genome taxon "$taxon" --limit !{params.datasets_max_genomes} --as-json-lines | \
-      dataformat tsv genome --fields accession,assminfo-refseq-category,assminfo-level,organism-name,assmstats-total-ungapped-len | \
-      grep -v Homo | \
-      grep -v "Assembly Accession" | \
-      tr '\\t' ',' \
-      >> datasets/!{taxon}_genomes.csv
+    python3 !{script} !{taxon} !{params.datasets_max_genomes}
   '''
 }
 
-// It is faster if datasets can download the entire list at a time, but there is a timeout for downloading that is about 20 minutes.
+// It is faster if datasets can download the entire list at a time, but there is a 20 minute timeout for downloading.
 // The '||' is to allow each genome to be downloaded on its own, which is longer overall but each genome should be less than 20 minutes.
 process datasets_download {
   tag           "Downloading Genomes"
   // because there's no way to specify threads
   label         "medcpus"
   publishDir = [ path: "${params.outdir}", mode: 'copy', pattern: "{logs/*/*log,datasets/fastani_refs.tar.gz}" ]
-  container     'staphb/ncbi-datasets:15.2.0'
+  container     'quay.io/uphl/datasets:15.12.0'
   maxForks      10
   //#UPHLICA errorStrategy { task.attempt < 2 ? 'retry' : 'ignore'}
   //#UPHLICA pod annotation: 'scheduler.illumina.com/presetSize', value: 'standard-medium'
@@ -83,7 +69,7 @@ process datasets_download {
     cat .command.sh >> $log_file
 
     cut -f 1 !{genomes} > all_runs.txt
-    grep -h -v Accession !{ids} | cut -f 1 -d , | sort | uniq > this_run.txt
+    grep -h -v accession !{ids} | cut -f 1 -d , | sort | uniq > this_run.txt
 
     cat all_runs.txt this_run.txt | sort | uniq > id_list.txt
 
diff --git a/modules/emmtyper.nf b/modules/emmtyper.nf
index 3bc7f22..b1aa1ec 100644
--- a/modules/emmtyper.nf
+++ b/modules/emmtyper.nf
@@ -10,11 +10,11 @@ process emmtyper {
   //#UPHLICA cpus 3
   //#UPHLICA time '24h'
   
-    when:
-    flag =~ 'found'
+  when:
+  flag =~ 'found'
 
   input:
-  tuple val(sample), file(contigs), val(flag)
+  tuple val(sample), file(contigs), val(flag), file(script)
 
   output:
   path "emmtyper/${sample}_emmtyper.txt"                         , emit: collect
@@ -34,12 +34,12 @@ process emmtyper {
     echo "Nextflow command : " >> $log_file
     cat .command.sh >> $log_file
     
-    echo -e "sample\\tIsolate name\\tNumber of BLAST hits\\tNumber of clusters\\tPredicted emm-type\\tPosition(s)\\tPossible emm-like alleles\\temm-like position(s)\\tEMM cluster" > emmtyper/!{sample}_emmtyper.txt
-
     emmtyper !{params.emmtyper_options} \
       --output-format 'verbose' \
       !{contigs} \
       | tee -a $log_file \
-      | awk -v sample=!{sample} '{ print sample "\\t" $0 }' >> emmtyper/!{sample}_emmtyper.txt
+      > !{sample}_emmtyper.txt
+
+    python3 !{script} !{sample}_emmtyper.txt emmtyper/!{sample}_emmtyper.txt emmtyper !{sample}
   '''
 }
diff --git a/modules/fastani.nf b/modules/fastani.nf
index 145b415..174306c 100644
--- a/modules/fastani.nf
+++ b/modules/fastani.nf
@@ -16,10 +16,10 @@ process fastani {
   tuple val(sample), file(contigs), path(genomes)
 
   output:
-  tuple val(sample), file("fastani/${sample}_fastani.csv")          , emit: results
-  tuple val(sample), env(top_hit), file("top_hit/*"), optional: true, emit: top_hit
-  path "fastani/*"                                                  , emit: everything
-  path "logs/${task.process}/${sample}.${workflow.sessionId}.log"   , emit: log
+  tuple val(sample), file("fastani/${sample}_fastani.csv")       , emit: results, optional: true
+  tuple val(sample), env(top_hit), file("top_hit/*")             , emit: top_hit, optional: true
+  path "fastani/*"                                               , emit: everything
+  path "logs/${task.process}/${sample}.${workflow.sessionId}.log", emit: log
 
   shell:
   '''
diff --git a/modules/kleborate.nf b/modules/kleborate.nf
index 301eef4..93c6322 100644
--- a/modules/kleborate.nf
+++ b/modules/kleborate.nf
@@ -13,10 +13,10 @@ process kleborate {
   flag =~ 'found'
 
   input:
-  tuple val(sample), file(contig), val(flag)
+  tuple val(sample), file(contig), val(flag), file(script)
 
   output:
-  path "kleborate/${sample}_results.tsv"                         , emit: collect
+  path "kleborate/${sample}_results.tsv"                         , emit: collect, optional: true
   path "kleborate/${sample}_results.txt"                         , emit: result
   path "logs/${task.process}/${sample}.${workflow.sessionId}.log", emit: log
 
@@ -37,7 +37,6 @@ process kleborate {
       -a !{contig} \
       | tee -a $log_file
 
-    head -n 1 kleborate/!{sample}_results.txt | awk '{print "sample\\t" $0}' > kleborate/!{sample}_results.tsv
-    tail -n 1 kleborate/!{sample}_results.txt | awk -v sample=!{sample} '{print sample "\\t" $0}' >> kleborate/!{sample}_results.tsv
+    python3 !{script} kleborate/!{sample}_results.txt kleborate/!{sample}_results.tsv kleborate !{sample}
   '''
 }
diff --git a/modules/kraken2.nf b/modules/kraken2.nf
index 5a05c5a..e8b9ba2 100644
--- a/modules/kraken2.nf
+++ b/modules/kraken2.nf
@@ -88,4 +88,4 @@ process kraken2_fasta {
       awk -v sample=!{sample} '{ if ($1 >= 5 ) print sample ",contigs," $1 "," $2 "," $3 "," $4 "," $5 "," $6 "_" $7 }' | \
       sort >> kraken2/!{sample}_contigs_summary_kraken2.csv
   '''
-}
+}
\ No newline at end of file
diff --git a/modules/mlst.nf b/modules/mlst.nf
index d802ba1..f2fd328 100644
--- a/modules/mlst.nf
+++ b/modules/mlst.nf
@@ -10,7 +10,7 @@ process mlst {
   //#UPHLICA time '10m'
 
   input:
-  tuple val(sample), file(contig)
+  tuple val(sample), file(contig), file(script)
 
   output:
   path "mlst/${sample}_mlst.tsv"                                 , emit: collect
@@ -28,11 +28,11 @@ process mlst {
     echo "Nextflow command : " >> $log_file
     cat .command.sh >> $log_file
 
-    echo -e "sample\\tfilename\\tmatching PubMLST scheme\\tST\\tID1\\tID2\\tID3\\tID4\\tID5\\tID6\\tID7\\tID8\\tID9\\tID10\\tID11\\tID12\\tID13\\tID14\\tID15" > mlst/!{sample}_mlst.tsv
-
     mlst !{params.mlst_options} \
       --threads !{task.cpus} \
-      !{contig} | \
-      awk -v sample=!{sample} '{print sample "\\t" $1 "\\t" $2 "\\t" $3 "\\t" $4 "\\t" $5 "\\t" $6 "\\t" $7 "\\t" $8 "\\t" $9 "\\t" $10 "\\t" $11 "\\t" $12 "\\t" $13 "\\t" $14 "\\t" $15 "\\t" $16 "\\t" $17 "\\t" $18}' >> mlst/!{sample}_mlst.tsv
+      !{contig} \
+      > !{sample}_mlst.txt
+
+    python3 !{script} !{sample}_mlst.txt mlst/!{sample}_mlst.tsv mlst !{sample}
   '''
 }
diff --git a/modules/plasmidfinder.nf b/modules/plasmidfinder.nf
index 944cdbf..afad63d 100644
--- a/modules/plasmidfinder.nf
+++ b/modules/plasmidfinder.nf
@@ -10,11 +10,11 @@ process plasmidfinder {
   //#UPHLICA time '10m'
 
   input:
-  tuple val(sample), file(file)
+  tuple val(sample), file(file), file(script)
 
   output:
   path "plasmidfinder/${sample}/*"                                    , emit: files
-  path "plasmidfinder/${sample}_plasmidfinder.tsv"                    , emit: collect
+  path "plasmidfinder/${sample}_plasmidfinder.tsv"                    , emit: collect, optional: true
   path "logs/${task.process}/${sample}.${workflow.sessionId}.log"     , emit: log
 
   shell:
@@ -35,7 +35,6 @@ process plasmidfinder {
       --extented_output \
       | tee -a $log_file
 
-    head -n 1 plasmidfinder/!{sample}/results_tab.tsv | awk '{print "sample\\t" $0 }' > plasmidfinder/!{sample}_plasmidfinder.tsv
-    tail -n +2 plasmidfinder/!{sample}/results_tab.tsv | awk -v sample=!{sample} '{print sample "\\t" $0 }' >> plasmidfinder/!{sample}_plasmidfinder.tsv
+    python3 !{script} plasmidfinder/!{sample}/results_tab.tsv plasmidfinder/!{sample}_plasmidfinder.tsv plasmidfinder !{sample}
   '''
 }
diff --git a/modules/seqsero2.nf b/modules/seqsero2.nf
index c7562fa..c6a460c 100644
--- a/modules/seqsero2.nf
+++ b/modules/seqsero2.nf
@@ -42,22 +42,25 @@ process seqsero2 {
       -n !{sample} \
       | tee -a $log_file
 
-    enteritidis_check=$(grep "Enteritidis" seqsero2/!{sample}/SeqSero_result.tsv | head -n 1)
-    sdf_check=$(grep "Detected Sdf" seqsero2/!{sample}/SeqSero_result.tsv | head -n 1 )
-
-    if [ -n "$enteritidis_check" ] && [ -n "$sdf_check" ]
-    then
-      head -n 1 seqsero2/!{sample}/SeqSero_result.tsv > SeqSero_result.tsv.tmp
-      tail -n 1 seqsero2/!{sample}/SeqSero_result.tsv | awk -F "\\t" -v OFS='\t' '{($9 = $9 " (Sdf+)") ; print $0}' >> SeqSero_result.tsv.tmp
-      mv SeqSero_result.tsv.tmp seqsero2/!{sample}/SeqSero_result.tsv
-    elif [ -n "$enteritidis_check" ] && [ -z "$sdf_check" ]
+    if [ -f "seqsero2/!{sample}/SeqSero_result.tsv" ]
     then
-      head -n 1 seqsero2/!{sample}/SeqSero_result.tsv > SeqSero_result.tsv.tmp
-      tail -n 1 seqsero2/!{sample}/SeqSero_result.tsv | awk -F "\\t" -v OFS='\t' '{($9 = $9 " (Sdf-)") ; print $0}' >> SeqSero_result.tsv.tmp
-      mv SeqSero_result.tsv.tmp seqsero2/!{sample}/SeqSero_result.tsv
-    fi
+      enteritidis_check=$(grep "Enteritidis" seqsero2/!{sample}/SeqSero_result.tsv | head -n 1)
+      sdf_check=$(grep "Detected Sdf" seqsero2/!{sample}/SeqSero_result.tsv | head -n 1 )
 
-    cat seqsero2/!{sample}/SeqSero_result.tsv | sed 's/Sample name/sample/g' > seqsero2/!{sample}/SeqSero_result.tsv.tmp
-    mv seqsero2/!{sample}/SeqSero_result.tsv.tmp seqsero2/!{sample}/SeqSero_result.tsv
+      if [ -n "$enteritidis_check" ] && [ -n "$sdf_check" ]
+      then
+        head -n 1 seqsero2/!{sample}/SeqSero_result.tsv > SeqSero_result.tsv.tmp
+        tail -n 1 seqsero2/!{sample}/SeqSero_result.tsv | awk -F "\\t" -v OFS='\t' '{($9 = $9 " (Sdf+)") ; print $0}' >> SeqSero_result.tsv.tmp
+        mv SeqSero_result.tsv.tmp seqsero2/!{sample}/SeqSero_result.tsv
+      elif [ -n "$enteritidis_check" ] && [ -z "$sdf_check" ]
+      then
+        head -n 1 seqsero2/!{sample}/SeqSero_result.tsv > SeqSero_result.tsv.tmp
+        tail -n 1 seqsero2/!{sample}/SeqSero_result.tsv | awk -F "\\t" -v OFS='\t' '{($9 = $9 " (Sdf-)") ; print $0}' >> SeqSero_result.tsv.tmp
+        mv SeqSero_result.tsv.tmp seqsero2/!{sample}/SeqSero_result.tsv
+      fi
+
+      cat seqsero2/!{sample}/SeqSero_result.tsv | sed 's/Sample name/sample/g' > seqsero2/!{sample}/SeqSero_result.tsv.tmp
+      mv seqsero2/!{sample}/SeqSero_result.tsv.tmp seqsero2/!{sample}/SeqSero_result.tsv
+    fi
   '''
 }
diff --git a/modules/serotypefinder.nf b/modules/serotypefinder.nf
index 4d58cb1..af3f37c 100644
--- a/modules/serotypefinder.nf
+++ b/modules/serotypefinder.nf
@@ -14,11 +14,11 @@ process serotypefinder {
   flag =~ 'found'
 
   input:
-  tuple val(sample), file(file), val(flag)
+  tuple val(sample), file(file), val(flag), file(script)
 
   output:
   path "serotypefinder/${sample}/*"                                     , emit: files
-  path "serotypefinder/${sample}_serotypefinder.tsv"                    , emit: collect
+  path "serotypefinder/${sample}_serotypefinder.tsv"                    , emit: collect, optional: true
   path "logs/${task.process}/${sample}.${workflow.sessionId}.log"       , emit: log
 
   shell:
@@ -40,7 +40,6 @@ process serotypefinder {
 
     cp serotypefinder/!{sample}/results_tab.tsv serotypefinder/!{sample}_serotypefinder.tsv
 
-    head -n 1 serotypefinder/!{sample}/results_tab.tsv | awk '{print "sample\\t" $0 }' > serotypefinder/!{sample}_serotypefinder.tsv
-    tail -n +2 serotypefinder/!{sample}/results_tab.tsv | awk -v sample=!{sample} '{print sample "\\t" $0 }' >> serotypefinder/!{sample}_serotypefinder.tsv
+    python3 !{script} serotypefinder/!{sample}/results_tab.tsv serotypefinder/!{sample}_serotypefinder.tsv serotypefinder !{sample}
   '''
 }
diff --git a/modules/shigatyper.nf b/modules/shigatyper.nf
index 92b9183..18e667d 100644
--- a/modules/shigatyper.nf
+++ b/modules/shigatyper.nf
@@ -15,10 +15,10 @@ process shigatyper {
   flag =~ 'found'
 
   input:
-  tuple val(sample), file(input), val(flag)
+  tuple val(sample), file(input), val(flag), file(script)
 
   output:
-  path "shigatyper/${sample}_shigatyper.tsv"                     , emit: files
+  path "shigatyper/${sample}_shigatyper.tsv",      optional: true, emit: files
   path "shigatyper/${sample}_shigatyper-hits.tsv", optional: true, emit: collect
   path "logs/${task.process}/${sample}.${workflow.sessionId}.log", emit: log
 
@@ -39,14 +39,8 @@ process shigatyper {
       --name !{sample} \
       | tee -a $log_file
 
-    hits=$(find . -iname "*hits.tsv" | head -n 1)
-    if [ -f "$hits" ]
-    then
-      head -n 1  $hits | awk '{print "sample\\t" $0}' > shigatyper/!{sample}_shigatyper-hits.tsv
-      tail -n +2 $hits | awk -v sample=!{sample} '{print sample "\\t" $0}' >> shigatyper/!{sample}_shigatyper-hits.tsv
-      rm $hits
-    fi
+    python3 !{script} !{sample}-hits.tsv shigatyper/!{sample}_shigatyper-hits.tsv shigatyper !{sample}
 
-    cat *tsv > shigatyper/!{sample}_shigatyper.tsv
+    if [ -f "!{sample}.tsv" ] ; then cp !{sample}.tsv shigatyper/!{sample}_shigatyper.tsv ; fi
   '''
 }
diff --git a/modules/snp-dists.nf b/modules/snp-dists.nf
index 3728298..fd7edf5 100644
--- a/modules/snp-dists.nf
+++ b/modules/snp-dists.nf
@@ -36,9 +36,9 @@ process snp_dists {
 
     genome_length=$(cat !{contigs} | tr "\n" ";" | sed 's/>[^>]*//2g' | tr ";" "\n" | grep -v ">" | wc -c )
 
-    sed '0,/,/s/,/num_samples=!{num_samples};num_core_genes=!{num_core_genes};core_genome_length=$genome_length,/' snp-dists/snp_matrix.txt > snp-dists/snp_matrix_with_qc.txt
+    sed '0,/,/s/,/num_samples=!{num_samples};num_core_genes=!{num_core_genes},/' snp-dists/snp_matrix.txt > snp-dists/snp_matrix_with_qc.txt
 
-    echo "num_samples,num_core_genes,core_genome_length"    >  snp-dists/roary_metrics_mqc.csv
-    echo "!{num_samples},!{num_core_genes},$genome_length" >> snp-dists/roary_metrics_mqc.csv
+    echo "num_samples,num_core_genes,core_genome_length"     >  snp-dists/roary_metrics_mqc.csv
+    echo "!{num_samples},!{num_core_genes},${genome_length}" >> snp-dists/roary_metrics_mqc.csv
   '''
 }
diff --git a/nextflow.config b/nextflow.config
index 96c7826..f1ed574 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -3,7 +3,7 @@ manifest {
   author                          = 'Erin Young'
   homePage                        = 'https://github.com/UPHL-BioNGS/Grandeur'
   mainScript                      = 'grandeur.nf'
-  version                         = '3.2.20230718'
+  version                         = '3.2.20230803'
   defaultBranch                   = 'main'
   description                     = 'Grandeur is short-read de novo assembly pipeline with serotyping.'
 }
diff --git a/subworkflows/average_nucleotide_identity.nf b/subworkflows/average_nucleotide_identity.nf
index 3372363..160cf61 100644
--- a/subworkflows/average_nucleotide_identity.nf
+++ b/subworkflows/average_nucleotide_identity.nf
@@ -10,6 +10,7 @@ workflow average_nucleotide_identity {
         ch_contigs
         ch_static_fastani_genomes
         ch_genome_ref
+        dataset_script
   
     main:
         if ( params.current_datasets ) {
@@ -20,7 +21,7 @@ workflow average_nucleotide_identity {
                 .map(it -> it.trim())
                 .set{ ch_species_list }
 
-            datasets_summary(ch_species_list)
+            datasets_summary(ch_species_list.combine(dataset_script))
             datasets_download(datasets_summary.out.genomes.collect(), ch_genome_ref)
             ch_fastani_db = datasets_download.out.genomes
 
diff --git a/subworkflows/information.nf b/subworkflows/information.nf
index 7502c08..180d1e7 100644
--- a/subworkflows/information.nf
+++ b/subworkflows/information.nf
@@ -20,6 +20,7 @@ workflow information {
     ch_contigs
     ch_flag
     ch_size
+    summfle_script
 
   main:
     // fastq files
@@ -27,9 +28,9 @@ workflow information {
     fastqc(ch_reads)
 
     // contigs
-    mlst(ch_contigs)
+    mlst(ch_contigs.combine(summfle_script))
     quast(ch_contigs)
-    plasmidfinder(ch_contigs)
+    plasmidfinder(ch_contigs.combine(summfle_script))
 
     // estimating size of genome for the oganism
     size(ch_size.join(quast.out.results, by: 0, remainder: true).map{ it -> tuple(it[0], [ it[1], it[2], it[3], it[4], it[5], it[6], it[7], it[8]])})
@@ -38,13 +39,13 @@ workflow information {
     flag(ch_flag.groupTuple())
 
     amrfinderplus(ch_contigs.join(flag.out.organism,    by:0))
-    emmtyper(ch_contigs.join(flag.out.strepa_flag,      by:0)) 
+    emmtyper(ch_contigs.join(flag.out.strepa_flag,      by:0).combine(summfle_script)) 
     //kaptive(ch_contigs.join(flag.out.klebacin_flag,     by:0))      
-    kleborate(ch_contigs.join(flag.out.klebsiella_flag, by:0))
+    kleborate(ch_contigs.join(flag.out.klebsiella_flag, by:0).combine(summfle_script))
     legsta(ch_contigs.join(flag.out.legionella_flag,    by:0))
     seqsero2(ch_contigs.join(flag.out.salmonella_flag,  by:0))
-    serotypefinder(ch_contigs.join(flag.out.ecoli_flag, by:0))
-    shigatyper(ch_contigs.join(flag.out.ecoli_flag,     by:0))
+    serotypefinder(ch_contigs.join(flag.out.ecoli_flag, by:0).combine(summfle_script))
+    shigatyper(ch_contigs.join(flag.out.ecoli_flag,     by:0).combine(summfle_script))
     pbptyper(ch_contigs.join(flag.out.streppneu_flag,   by:0))
     
     emmtyper.out.collect