From 33619f85863d3755825577e53539fb3cc1a8962c Mon Sep 17 00:00:00 2001 From: Bethan Yates Date: Thu, 21 Sep 2023 17:40:09 +0100 Subject: [PATCH 1/7] Configure structure of higlass ingress directory --- conf/test.config | 2 ++ conf/test_full.config | 10 ++++++---- docs/usage.md | 11 ++++++++--- modules/local/upload_higlass_data.nf | 12 ++++++++---- nextflow.config | 2 ++ nextflow_schema.json | 11 +++++++++++ subworkflows/local/contact_maps.nf | 2 +- 7 files changed, 38 insertions(+), 12 deletions(-) diff --git a/conf/test.config b/conf/test.config index 3c33c0b0..02a76179 100644 --- a/conf/test.config +++ b/conf/test.config @@ -27,6 +27,7 @@ params { // Input data for genome_metadata subworkflow assembly = 'GCA_946965045.2' + species = 'Epithemia_sp._CRS-2021b' taxon_id = '2809013' bioproject = 'PRJEB56202' biosample = 'SAMEA10835113' @@ -38,6 +39,7 @@ params { // HiGlass Options upload_higlass_data = false higlass_upload_directory = "/lustre/scratch123/tol/share/genome-note-higlass/data_to_load" + higlass_data_basedir = "/asg/algae" higlass_deployment_name = "higlass-app-genome-note" higlass_namespace = "tol-higlass-genome-note" higlass_kubeconfig = "~/.kube/config.tol-it-dev-k8s" diff --git a/conf/test_full.config b/conf/test_full.config index d9b70fae..598779a4 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -26,10 +26,11 @@ params { lineage_db = "/lustre/scratch123/tol/resources/busco/v5" // Input data for genome_metadata subworkflow - assembly = 'GCA_946965045.2' - taxon_id = '2809013' - bioproject = 'PRJEB56202' - biosample = 'SAMEA10835113' + assembly = 'GCA_934047225.1' + species = 'Ypsolopha_sequella' + taxon_id = '1870436' + bioproject = 'PRJEB51790' + biosample = 'SAMEA7519929' // Genome Notes Portal write_to_portal = true @@ -38,6 +39,7 @@ params { // HiGlass Options upload_higlass_data = true higlass_upload_directory = "/lustre/scratch123/tol/share/genome-note-higlass/data_to_load" + higlass_data_basedir = "/darwin/insects" higlass_deployment_name = "higlass-app-genome-note" higlass_namespace = "tol-higlass-genome-note" higlass_kubeconfig = "~/.kube/config.tol-it-dev-k8s" diff --git a/docs/usage.md b/docs/usage.md index 8723f6d7..28ef9928 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -12,10 +12,12 @@ The pipeline also collates (1) assembly metadata from ENA, NCBI and GoaT (2) ass ## Genome metadata input -You will need to supply the assembly accession for the genome you would like to analyse along with the bioproject accession and the biosample acession linked to this genome assembly. +You will need to supply the assembly accession for the genome you would like to analyse along with the species name, taxon_id, bioproject accession and the biosample acession linked to this genome assembly. ```bash --assembly '[assembly accession]' + --species '[species name]' + --taxon_id '[taxon id]' --bioproject '[bioproject accession]' --biosample '[biosample accession]' ``` @@ -38,6 +40,7 @@ If you wish to run the optional step that writes the .mcool and .genome files pr ```bash --upload_higlass_data 'true' --higlass_upload_directory '[Path to ingress directory for kubernetes]' + --higlass_data_basedir '[Directory structure to be used for Higlass data, suggestions is to use //]' --higlass_deployment_name '[ Name of Higlass Deployment in kubernetes]' --higlass_namespace '[Name of the namespace used for Higlass Deployment in Kubernetes]' --higlass_kubeconfig '[path to kubeconfig file]' @@ -86,7 +89,7 @@ An [example samplesheet](https://raw.githubusercontent.com/sanger-tol/genomenote The typical command for running the pipeline is as follows: ```bash -nextflow run sanger-tol/genomenote --input samplesheet.csv --outdir --fasta genome.fasta --assembly GCA_922984935.2 --bioproject PRJEB49353 --biosample SAMEA7524400 -profile docker +nextflow run sanger-tol/genomenote --input samplesheet.csv --outdir --fasta genome.fasta --assembly GCA_922984935.2 --species Epithemia_sp._CRS-2021b --taxon_id 2809013 --bioproject PRJEB49353 --biosample SAMEA7524400 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -119,8 +122,10 @@ outdir: './results/' fasta: './genome.fasta' input: 'data' assembly: 'GCA_922984935.2' +species: 'Epithemia_sp._CRS-2021b' +taxon_id: '2809013' bioproject: 'PRJEB49353' -biosample" 'SAMEA7524400' +biosample: 'SAMEA7524400' <...> ``` diff --git a/modules/local/upload_higlass_data.nf b/modules/local/upload_higlass_data.nf index fd4ed182..165bc77b 100644 --- a/modules/local/upload_higlass_data.nf +++ b/modules/local/upload_higlass_data.nf @@ -7,6 +7,8 @@ process UPLOAD_HIGLASS_DATA { input: tuple val(meta), path(mcool) tuple val(meta2), path(genome) + val(higlass_data_basedir) + val(species) val(assembly) path(upload_dir) @@ -21,6 +23,7 @@ process UPLOAD_HIGLASS_DATA { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "UPLOAD_HIGLASS_DATA modules do not support Conda. Please use Docker / Singularity / Podman instead." } + """ # Configure kubectl access to the namespace @@ -35,14 +38,15 @@ process UPLOAD_HIGLASS_DATA { echo "\$pod_name" # Copy the files to the upload area - cp -f $mcool $upload_dir - cp -f $genome $upload_dir/${genome.baseName}.genome + mkdir -p $upload_dir${higlass_data_basedir}/${species.replaceAll("\\s","_")} + cp -f $mcool $upload_dir${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}.mcool + cp -f $genome $upload_dir/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}.genome # Load them in Kubernetes echo "Loading .mcool file" - kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/$mcool.name --filetype cooler --datatype matrix --project-name $assembly --name ${assembly}_map + kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}.mcool --filetype cooler --datatype matrix --project-name $assembly --name ${assembly}_map echo "Loading .genome file" - kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${genome.baseName}.genome --filetype chromsizes.tsv --datatype chromsizes --coordSystem ${assembly}_assembly --project-name $assembly --name ${assembly}_grid + kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}.genome --filetype chromsizes.tsv --datatype chromsizes --coordSystem ${assembly}_assembly --project-name $assembly --name ${assembly}_grid echo "done" cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index f826212b..373bc324 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,6 +16,7 @@ params { // Metadata assembly = null + species = null taxon_id = null bioproject = null biosample = null @@ -27,6 +28,7 @@ params { // HiGlass options upload_higlass_data = false higlass_upload_directory = null + higlass_data_basedir = null higlass_kubeconfig = null higlass_deployment_name = null higlass_namespace = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e8e2ea22..120e8921 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -36,6 +36,10 @@ "type": "string", "description": "The Genbank assembly accession for the assembly, for example: GCA_922984935.2." }, + "species": { + "type": "string", + "description": "The species name for the assembly with spaces replaced with '_', for example: Epithemia_sp._CRS-2021b." + }, "taxon_id": { "type": "string", "description": "The NCBI taxonomy ID corresponding to the GCA assembly accession, for example: 9662." @@ -93,6 +97,13 @@ "fa_icon": "fas fa-folder-open", "hidden": true }, + "higlass_data_basedir": { + "type": "string", + "format": "directory-path", + "description": "Subdirectory struture to use for organising HiGlass data, suggested format is / e.g. '/asg/algae'", + "fa_icon": "fas fa-folder-open", + "hidden": true + }, "higlass_kubeconfig": { "type": "string", "format": "file-path", diff --git a/subworkflows/local/contact_maps.nf b/subworkflows/local/contact_maps.nf index d72459b4..be938b0f 100644 --- a/subworkflows/local/contact_maps.nf +++ b/subworkflows/local/contact_maps.nf @@ -99,7 +99,7 @@ workflow CONTACT_MAPS { // Optionally add the files to a HiGlass webserver if ( params.upload_higlass_data ) { - UPLOAD_HIGLASS_DATA (COOLER_ZOOMIFY.out.mcool, COOLER_DUMP.out.bedpe, params.assembly, params.higlass_upload_directory ) + UPLOAD_HIGLASS_DATA (COOLER_ZOOMIFY.out.mcool, COOLER_DUMP.out.bedpe, params.higlass_data_basedir, params.species, params.assembly, params.higlass_upload_directory ) ch_versions = ch_versions.mix ( UPLOAD_HIGLASS_DATA.out.versions.first() ) } From 27e28b2c69d716231528a39b42c8ef1a88d3de7b Mon Sep 17 00:00:00 2001 From: Bethan Yates Date: Thu, 21 Sep 2023 17:44:00 +0100 Subject: [PATCH 2/7] fixed linting --- modules/local/upload_higlass_data.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/local/upload_higlass_data.nf b/modules/local/upload_higlass_data.nf index 165bc77b..ea47ac91 100644 --- a/modules/local/upload_higlass_data.nf +++ b/modules/local/upload_higlass_data.nf @@ -23,7 +23,6 @@ process UPLOAD_HIGLASS_DATA { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { error "UPLOAD_HIGLASS_DATA modules do not support Conda. Please use Docker / Singularity / Podman instead." } - """ # Configure kubectl access to the namespace From f33de3da551764daf7528b6fc463400aebc447c8 Mon Sep 17 00:00:00 2001 From: Bethan Yates Date: Fri, 22 Sep 2023 17:05:33 +0100 Subject: [PATCH 3/7] change file name on higlass --- modules/local/upload_higlass_data.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/local/upload_higlass_data.nf b/modules/local/upload_higlass_data.nf index ea47ac91..bd0b174a 100644 --- a/modules/local/upload_higlass_data.nf +++ b/modules/local/upload_higlass_data.nf @@ -37,15 +37,15 @@ process UPLOAD_HIGLASS_DATA { echo "\$pod_name" # Copy the files to the upload area - mkdir -p $upload_dir${higlass_data_basedir}/${species.replaceAll("\\s","_")} - cp -f $mcool $upload_dir${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}.mcool - cp -f $genome $upload_dir/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}.genome + mkdir -p $upload_dir${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly} + cp -f $mcool $upload_dir${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}/${assembly}.mcool + cp -f $genome $upload_dir/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}/${assembly}.genome # Load them in Kubernetes echo "Loading .mcool file" - kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}.mcool --filetype cooler --datatype matrix --project-name $assembly --name ${assembly}_map + kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}/${assembly}.mcool --filetype cooler --datatype matrix --project-name ${higlass_data_basedir}/${species.replaceAll("\\s","_")}/$assembly --name ${assembly}_map echo "Loading .genome file" - kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}.genome --filetype chromsizes.tsv --datatype chromsizes --coordSystem ${assembly}_assembly --project-name $assembly --name ${assembly}_grid + kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}/${assembly}.genome --filetype chromsizes.tsv --datatype chromsizes --coordSystem ${assembly}_assembly --project-name ${higlass_data_basedir}/${species.replaceAll("\\s","_")}/$assembly --name ${assembly}_grid echo "done" cat <<-END_VERSIONS > versions.yml From 3b1008d52cdd868501b0dc6b4a05a8838b7d8fca Mon Sep 17 00:00:00 2001 From: Bethan Yates Date: Wed, 4 Oct 2023 16:13:44 +0100 Subject: [PATCH 4/7] introduced variables and renamed params to make code easier to maintain --- conf/test.config | 4 ++-- conf/test_full.config | 2 +- docs/usage.md | 2 +- modules/local/upload_higlass_data.nf | 17 ++++++++++------- nextflow.config | 2 +- nextflow_schema.json | 2 +- subworkflows/local/contact_maps.nf | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) diff --git a/conf/test.config b/conf/test.config index 02a76179..2f70fb4d 100644 --- a/conf/test.config +++ b/conf/test.config @@ -38,9 +38,9 @@ params { // HiGlass Options upload_higlass_data = false - higlass_upload_directory = "/lustre/scratch123/tol/share/genome-note-higlass/data_to_load" - higlass_data_basedir = "/asg/algae" higlass_deployment_name = "higlass-app-genome-note" higlass_namespace = "tol-higlass-genome-note" higlass_kubeconfig = "~/.kube/config.tol-it-dev-k8s" + higlass_upload_directory = "/lustre/scratch123/tol/share/genome-note-higlass/data_to_load" + higlass_data_project_dir = "/asg/algae" } diff --git a/conf/test_full.config b/conf/test_full.config index 598779a4..caf4febc 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -39,7 +39,7 @@ params { // HiGlass Options upload_higlass_data = true higlass_upload_directory = "/lustre/scratch123/tol/share/genome-note-higlass/data_to_load" - higlass_data_basedir = "/darwin/insects" + higlass_data_project_dir = "/darwin/insects" higlass_deployment_name = "higlass-app-genome-note" higlass_namespace = "tol-higlass-genome-note" higlass_kubeconfig = "~/.kube/config.tol-it-dev-k8s" diff --git a/docs/usage.md b/docs/usage.md index 28ef9928..e2fc612b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -40,7 +40,7 @@ If you wish to run the optional step that writes the .mcool and .genome files pr ```bash --upload_higlass_data 'true' --higlass_upload_directory '[Path to ingress directory for kubernetes]' - --higlass_data_basedir '[Directory structure to be used for Higlass data, suggestions is to use //]' + --higlass_data_project_dir '[Directory structure to be used for Higlass data, suggestions is to use //]' --higlass_deployment_name '[ Name of Higlass Deployment in kubernetes]' --higlass_namespace '[Name of the namespace used for Higlass Deployment in Kubernetes]' --higlass_kubeconfig '[path to kubeconfig file]' diff --git a/modules/local/upload_higlass_data.nf b/modules/local/upload_higlass_data.nf index bd0b174a..01728ef7 100644 --- a/modules/local/upload_higlass_data.nf +++ b/modules/local/upload_higlass_data.nf @@ -7,9 +7,9 @@ process UPLOAD_HIGLASS_DATA { input: tuple val(meta), path(mcool) tuple val(meta2), path(genome) - val(higlass_data_basedir) val(species) val(assembly) + val(higlass_data_project_dir) path(upload_dir) output: @@ -24,6 +24,9 @@ process UPLOAD_HIGLASS_DATA { error "UPLOAD_HIGLASS_DATA modules do not support Conda. Please use Docker / Singularity / Podman instead." } + def project_name = "${higlass_data_project_dir}/${species.replaceAll('\\s','_')}/${assembly}" + def file_name = "${assembly}_${meta.id}" + """ # Configure kubectl access to the namespace export KUBECONFIG=$params.higlass_kubeconfig @@ -37,20 +40,20 @@ process UPLOAD_HIGLASS_DATA { echo "\$pod_name" # Copy the files to the upload area - mkdir -p $upload_dir${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly} - cp -f $mcool $upload_dir${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}/${assembly}.mcool - cp -f $genome $upload_dir/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}/${assembly}.genome + mkdir -p ${upload_dir}${project_name} + cp -f $mcool ${upload_dir}${project_name}/${file_name}.mcool + cp -f $genome ${upload_dir}${project_name}/${file_name}.genome # Load them in Kubernetes echo "Loading .mcool file" - kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}/${assembly}.mcool --filetype cooler --datatype matrix --project-name ${higlass_data_basedir}/${species.replaceAll("\\s","_")}/$assembly --name ${assembly}_map + kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${project_name}/${file_name}.mcool --filetype cooler --datatype matrix --project-name ${project_name} --name ${file_name}_map echo "Loading .genome file" - kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${higlass_data_basedir}/${species.replaceAll("\\s","_")}/${assembly}/${assembly}.genome --filetype chromsizes.tsv --datatype chromsizes --coordSystem ${assembly}_assembly --project-name ${higlass_data_basedir}/${species.replaceAll("\\s","_")}/$assembly --name ${assembly}_grid + kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${project_name}/${file_name}.genome --filetype chromsizes.tsv --datatype chromsizes --coordSystem ${assembly}_assembly --project-name ${project_name} --name ${file_name}_grid echo "done" cat <<-END_VERSIONS > versions.yml "${task.process}": - kubectl: \$(kubectl version --output=json | jq -r ".clientVersion.gitVersion") + kubectl: \$(kubectl version --output=json | jq -r ".clientVersion.gitVersion") END_VERSIONS """ } diff --git a/nextflow.config b/nextflow.config index 373bc324..80c54e5e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,7 @@ params { // HiGlass options upload_higlass_data = false higlass_upload_directory = null - higlass_data_basedir = null + higlass_data_project_dir = null higlass_kubeconfig = null higlass_deployment_name = null higlass_namespace = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 120e8921..e43b5231 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -97,7 +97,7 @@ "fa_icon": "fas fa-folder-open", "hidden": true }, - "higlass_data_basedir": { + "higlass_data_project_dir": { "type": "string", "format": "directory-path", "description": "Subdirectory struture to use for organising HiGlass data, suggested format is / e.g. '/asg/algae'", diff --git a/subworkflows/local/contact_maps.nf b/subworkflows/local/contact_maps.nf index be938b0f..b2df963b 100644 --- a/subworkflows/local/contact_maps.nf +++ b/subworkflows/local/contact_maps.nf @@ -99,7 +99,7 @@ workflow CONTACT_MAPS { // Optionally add the files to a HiGlass webserver if ( params.upload_higlass_data ) { - UPLOAD_HIGLASS_DATA (COOLER_ZOOMIFY.out.mcool, COOLER_DUMP.out.bedpe, params.higlass_data_basedir, params.species, params.assembly, params.higlass_upload_directory ) + UPLOAD_HIGLASS_DATA (COOLER_ZOOMIFY.out.mcool, COOLER_DUMP.out.bedpe, params.species, params.assembly, params.higlass_data_project_dir, params.higlass_upload_directory ) ch_versions = ch_versions.mix ( UPLOAD_HIGLASS_DATA.out.versions.first() ) } From cd43c032b26d745ea660e9cb9c540b1bde4d2423 Mon Sep 17 00:00:00 2001 From: Bethan Yates Date: Wed, 4 Oct 2023 16:31:59 +0100 Subject: [PATCH 5/7] Change contact map results files to have name that matches that used for upload to HiGlass --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 212273e6..0c40c8a4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -45,7 +45,7 @@ process { publishDir = [ path: { "${params.outdir}/contact_maps" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : "${params.assembly}_" + filename } ] } From 28833e52a81985c682413b74b20a81aaeec3bceb Mon Sep 17 00:00:00 2001 From: Bethan Yates Date: Wed, 4 Oct 2023 16:42:05 +0100 Subject: [PATCH 6/7] fixed linting --- modules/local/upload_higlass_data.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/upload_higlass_data.nf b/modules/local/upload_higlass_data.nf index 01728ef7..56283fc7 100644 --- a/modules/local/upload_higlass_data.nf +++ b/modules/local/upload_higlass_data.nf @@ -53,7 +53,7 @@ process UPLOAD_HIGLASS_DATA { cat <<-END_VERSIONS > versions.yml "${task.process}": - kubectl: \$(kubectl version --output=json | jq -r ".clientVersion.gitVersion") + kubectl: \$(kubectl version --output=json | jq -r ".clientVersion.gitVersion") END_VERSIONS """ } From 48effa5defcb371cba0ca47552a4a313836cadc1 Mon Sep 17 00:00:00 2001 From: BethYates <113996036+BethYates@users.noreply.github.com> Date: Mon, 9 Oct 2023 16:43:34 +0100 Subject: [PATCH 7/7] fix alignment Co-authored-by: Matthieu Muffato --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 80c54e5e..59d14be8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -28,7 +28,7 @@ params { // HiGlass options upload_higlass_data = false higlass_upload_directory = null - higlass_data_project_dir = null + higlass_data_project_dir = null higlass_kubeconfig = null higlass_deployment_name = null higlass_namespace = null