Skip to content

Commit

Permalink
Merge pull request #82 from sanger-tol/update_higlass
Browse files Browse the repository at this point in the history
Configure structure of HiGlass ingress directory
  • Loading branch information
BethYates authored Oct 10, 2023
2 parents f0b6da8 + 48effa5 commit f999acb
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 14 deletions.
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ process {
publishDir = [
path: { "${params.outdir}/contact_maps" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
saveAs: { filename -> filename.equals('versions.yml') ? null : "${params.assembly}_" + filename }
]
}

Expand Down
4 changes: 3 additions & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ params {

// Input data for genome_metadata subworkflow
assembly = 'GCA_946965045.2'
species = 'Epithemia_sp._CRS-2021b'
taxon_id = '2809013'
bioproject = 'PRJEB56202'
biosample = 'SAMEA10835113'
Expand All @@ -37,8 +38,9 @@ params {

// HiGlass Options
upload_higlass_data = false
higlass_upload_directory = "/lustre/scratch123/tol/share/genome-note-higlass/data_to_load"
higlass_deployment_name = "higlass-app-genome-note"
higlass_namespace = "tol-higlass-genome-note"
higlass_kubeconfig = "~/.kube/config.tol-it-dev-k8s"
higlass_upload_directory = "/lustre/scratch123/tol/share/genome-note-higlass/data_to_load"
higlass_data_project_dir = "/asg/algae"
}
10 changes: 6 additions & 4 deletions conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@ params {
lineage_db = "/lustre/scratch123/tol/resources/busco/v5"

// Input data for genome_metadata subworkflow
assembly = 'GCA_946965045.2'
taxon_id = '2809013'
bioproject = 'PRJEB56202'
biosample = 'SAMEA10835113'
assembly = 'GCA_934047225.1'
species = 'Ypsolopha_sequella'
taxon_id = '1870436'
bioproject = 'PRJEB51790'
biosample = 'SAMEA7519929'

// Genome Notes Portal
write_to_portal = true
Expand All @@ -38,6 +39,7 @@ params {
// HiGlass Options
upload_higlass_data = true
higlass_upload_directory = "/lustre/scratch123/tol/share/genome-note-higlass/data_to_load"
higlass_data_project_dir = "/darwin/insects"
higlass_deployment_name = "higlass-app-genome-note"
higlass_namespace = "tol-higlass-genome-note"
higlass_kubeconfig = "~/.kube/config.tol-it-dev-k8s"
Expand Down
11 changes: 8 additions & 3 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ The pipeline also collates (1) assembly metadata from ENA, NCBI and GoaT (2) ass

## Genome metadata input

You will need to supply the assembly accession for the genome you would like to analyse along with the bioproject accession and the biosample acession linked to this genome assembly.
You will need to supply the assembly accession for the genome you would like to analyse along with the species name, taxon_id, bioproject accession and the biosample acession linked to this genome assembly.

```bash
--assembly '[assembly accession]'
--species '[species name]'
--taxon_id '[taxon id]'
--bioproject '[bioproject accession]'
--biosample '[biosample accession]'
```
Expand All @@ -38,6 +40,7 @@ If you wish to run the optional step that writes the .mcool and .genome files pr
```bash
--upload_higlass_data 'true'
--higlass_upload_directory '[Path to ingress directory for kubernetes]'
--higlass_data_project_dir '[Directory structure to be used for Higlass data, suggestions is to use /<project-name>/<taxon-group>]'
--higlass_deployment_name '[ Name of Higlass Deployment in kubernetes]'
--higlass_namespace '[Name of the namespace used for Higlass Deployment in Kubernetes]'
--higlass_kubeconfig '[path to kubeconfig file]'
Expand Down Expand Up @@ -86,7 +89,7 @@ An [example samplesheet](https://raw.githubusercontent.com/sanger-tol/genomenote
The typical command for running the pipeline is as follows:

```bash
nextflow run sanger-tol/genomenote --input samplesheet.csv --outdir <OUTDIR> --fasta genome.fasta --assembly GCA_922984935.2 --bioproject PRJEB49353 --biosample SAMEA7524400 -profile docker
nextflow run sanger-tol/genomenote --input samplesheet.csv --outdir <OUTDIR> --fasta genome.fasta --assembly GCA_922984935.2 --species Epithemia_sp._CRS-2021b --taxon_id 2809013 --bioproject PRJEB49353 --biosample SAMEA7524400 -profile docker
```

This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles.
Expand Down Expand Up @@ -119,8 +122,10 @@ outdir: './results/'
fasta: './genome.fasta'
input: 'data'
assembly: 'GCA_922984935.2'
species: 'Epithemia_sp._CRS-2021b'
taxon_id: '2809013'
bioproject: 'PRJEB49353'
biosample" 'SAMEA7524400'
biosample: 'SAMEA7524400'
<...>
```

Expand Down
14 changes: 10 additions & 4 deletions modules/local/upload_higlass_data.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ process UPLOAD_HIGLASS_DATA {
input:
tuple val(meta), path(mcool)
tuple val(meta2), path(genome)
val(species)
val(assembly)
val(higlass_data_project_dir)
path(upload_dir)

output:
Expand All @@ -22,6 +24,9 @@ process UPLOAD_HIGLASS_DATA {
error "UPLOAD_HIGLASS_DATA modules do not support Conda. Please use Docker / Singularity / Podman instead."
}

def project_name = "${higlass_data_project_dir}/${species.replaceAll('\\s','_')}/${assembly}"
def file_name = "${assembly}_${meta.id}"

"""
# Configure kubectl access to the namespace
export KUBECONFIG=$params.higlass_kubeconfig
Expand All @@ -35,14 +40,15 @@ process UPLOAD_HIGLASS_DATA {
echo "\$pod_name"
# Copy the files to the upload area
cp -f $mcool $upload_dir
cp -f $genome $upload_dir/${genome.baseName}.genome
mkdir -p ${upload_dir}${project_name}
cp -f $mcool ${upload_dir}${project_name}/${file_name}.mcool
cp -f $genome ${upload_dir}${project_name}/${file_name}.genome
# Load them in Kubernetes
echo "Loading .mcool file"
kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/$mcool.name --filetype cooler --datatype matrix --project-name $assembly --name ${assembly}_map
kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${project_name}/${file_name}.mcool --filetype cooler --datatype matrix --project-name ${project_name} --name ${file_name}_map
echo "Loading .genome file"
kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${genome.baseName}.genome --filetype chromsizes.tsv --datatype chromsizes --coordSystem ${assembly}_assembly --project-name $assembly --name ${assembly}_grid
kubectl exec \$pod_name -- python /home/higlass/projects/higlass-server/manage.py ingest_tileset --filename /higlass-temp/${project_name}/${file_name}.genome --filetype chromsizes.tsv --datatype chromsizes --coordSystem ${assembly}_assembly --project-name ${project_name} --name ${file_name}_grid
echo "done"
cat <<-END_VERSIONS > versions.yml
Expand Down
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ params {

// Metadata
assembly = null
species = null
taxon_id = null
bioproject = null
biosample = null
Expand All @@ -27,6 +28,7 @@ params {
// HiGlass options
upload_higlass_data = false
higlass_upload_directory = null
higlass_data_project_dir = null
higlass_kubeconfig = null
higlass_deployment_name = null
higlass_namespace = null
Expand Down
11 changes: 11 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
"type": "string",
"description": "The Genbank assembly accession for the assembly, for example: GCA_922984935.2."
},
"species": {
"type": "string",
"description": "The species name for the assembly with spaces replaced with '_', for example: Epithemia_sp._CRS-2021b."
},
"taxon_id": {
"type": "string",
"description": "The NCBI taxonomy ID corresponding to the GCA assembly accession, for example: 9662."
Expand Down Expand Up @@ -93,6 +97,13 @@
"fa_icon": "fas fa-folder-open",
"hidden": true
},
"higlass_data_project_dir": {
"type": "string",
"format": "directory-path",
"description": "Subdirectory struture to use for organising HiGlass data, suggested format is <project_name>/<taxon_group> e.g. '/asg/algae'",
"fa_icon": "fas fa-folder-open",
"hidden": true
},
"higlass_kubeconfig": {
"type": "string",
"format": "file-path",
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/contact_maps.nf
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ workflow CONTACT_MAPS {
// Optionally add the files to a HiGlass webserver

if ( params.upload_higlass_data ) {
UPLOAD_HIGLASS_DATA (COOLER_ZOOMIFY.out.mcool, COOLER_DUMP.out.bedpe, params.assembly, params.higlass_upload_directory )
UPLOAD_HIGLASS_DATA (COOLER_ZOOMIFY.out.mcool, COOLER_DUMP.out.bedpe, params.species, params.assembly, params.higlass_data_project_dir, params.higlass_upload_directory )
ch_versions = ch_versions.mix ( UPLOAD_HIGLASS_DATA.out.versions.first() )
}

Expand Down

0 comments on commit f999acb

Please sign in to comment.