From 8f5a4af08d482fe1907b24157602b151994becce Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Mon, 16 Oct 2023 16:11:54 -0700 Subject: [PATCH] Add support for samplesheet input (#54) * Add support for samplesheet input * Update README --- README.md | 20 +++++++++++++++++++- conf/illumina.config | 4 ++++ main.nf | 16 ++++++++++++---- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index c2ed82c5..70d71723 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ This Nextflow pipeline automates the ARTIC network [nCoV-2019 novel coronavirus ##### Illumina ``` -nextflow run BCCDC-PHL/ncov2019-artic-nf [-profile conda,singularity,docker,slurm,lsf] \ +nextflow run BCCDC-PHL/ncov2019-artic-nf -profile conda \ --illumina --prefix "output_file_prefix" \ --primer_pairs_tsv /path/to/primer_pairs_tsv \ --composite_ref /path/to/human_and_sars-cov-2_composite_ref \ @@ -27,6 +27,24 @@ For production use at large scale, where you will run the workflow many times, y Alternatively you can avoid just the cloning of the scheme repository to remain on a fixed revision of it over time by passing --schemeRepoURL /path/to/own/clone/of/github.com/artic-network/artic-ncov2019. This removes any internet access from the workflow except for the optional upload steps. +###### SampleSheet Input + +Samples can also be provided to the pipeline via a `samplesheet.csv` file: + +``` +nextflow run BCCDC-PHL/ncov2019-artic-nf -profile conda \ + --illumina --prefix "output_file_prefix" \ + --primer_pairs_tsv /path/to/primer_pairs_tsv \ + --composite_ref /path/to/human_and_sars-cov-2_composite_ref \ + --samplesheet_input /path/to/samplesheet.csv +``` + +The `samplesheet.csv` file must include the headers: + +`ID,R1,R2` + +...and each record should be a comma-separated line consisting of the sample ID, the path to the R1 fastq file for that sample, and the path to the R2 fastq file for that sample. + ##### Nanopore ###### Nanopolish diff --git a/conf/illumina.config b/conf/illumina.config index f7826afe..5f304a31 100644 --- a/conf/illumina.config +++ b/conf/illumina.config @@ -20,6 +20,10 @@ params { fastq_exts = ['.fastq.gz', '.fq.gz'] fastqSearchPath = makeFastqSearchPath( params.illuminaSuffixes, params.fastq_exts ) + + // Provide sample ID and fastq paths via a samplesheet.csv with fields: + // ID,R1,R2 + samplesheet_input = 'NO_FILE' // Use cram input instead of fastq files cram = false diff --git a/main.nf b/main.nf index c6d6b332..ffcacce6 100644 --- a/main.nf +++ b/main.nf @@ -22,8 +22,9 @@ if (params.profile){ } if ( params.illumina ) { - if ( !params.directory ) { + if ( !params.directory && params.samplesheet_input == "NO_FILE" ) { println("Please supply a directory containing fastqs or CRAMs with --directory. Specify --cram if supplying a CRAMs directory") + println("Or provide a samplesheet (headers: ID,R1,R2) with --samplesheet_input") println("Use --help to print help") System.exit(1) } @@ -78,9 +79,16 @@ workflow { .set{ ch_cramFiles } } else { - Channel.fromFilePairs( params.fastqSearchPath, flat: true) - .filter{ !( it[0] =~ /Undetermined/ ) } - .set{ ch_filePairs } + if ( params.samplesheet_input != "NO_FILE" ) { + Channel.fromPath(params.samplesheet_input) + .splitCsv(header: true).map{ it -> [it['ID'], it['R1'], it['R2']] } + .set{ ch_filePairs } + } + else { + Channel.fromFilePairs( params.fastqSearchPath, flat: true) + .filter{ !( it[0] =~ /Undetermined/ ) } + .set{ ch_filePairs } + } } } else {