Skip to content

Commit

Permalink
Changes to fix failing tests. Refactored parsing of metadata files to…
Browse files Browse the repository at this point in the history
… use a single module
  • Loading branch information
BethYates committed Oct 10, 2023
1 parent a622254 commit c64f557
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 77 deletions.
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ params {
biosample = 'SAMEA10835113'

// Genome Notes Portal
write_to_portal = true
write_to_portal = false
genome_notes_api = "https://notes-staging.tol.sanger.ac.uk/api/v1"

}
2 changes: 1 addition & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ params {
biosample = 'SAMEA10835113'

// Genome Notes Portal
write_to_portal = true
write_to_portal = false
genome_notes_api = "https://notes-staging.tol.sanger.ac.uk/api/v1"

}
11 changes: 5 additions & 6 deletions modules/local/combine_metadata.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process COMBINE_METADATA {
tag "${meta.id}|combine_parsed"
tag "${meta.id}"
label 'process_single'

conda "conda-forge::python=3.9.1"
Expand All @@ -8,7 +8,7 @@ process COMBINE_METADATA {
'quay.io/biocontainers/python:3.9--1' }"

input:
tuple val(meta), val(file_list)
tuple val(meta), path(file_list)

output:
tuple val (meta), path("consistent.csv") , emit: consistent
Expand All @@ -21,10 +21,9 @@ process COMBINE_METADATA {
script:
def args = []
for (item in file_list){
def meta_file = item[0]
def file = item[1]
def arg = "--${meta_file.source}_${meta_file.type}_file".toLowerCase()
args.add(arg)
def file = item
def file_name = "--" + item.getSimpleName() + "_file"
args.add(file_name)
args.add(file)
}

Expand Down
35 changes: 35 additions & 0 deletions modules/local/parse_metadata.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

process PARSE_METADATA {
tag "${meta.ext}|${meta.source}|${meta.type}"
label 'process_single'

conda "conda-forge::python=3.9.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9--1' :
'quay.io/biocontainers/python:3.9--1' }"

input:
tuple val(meta), path(json)

output:
tuple val(meta), path("${meta.source.toLowerCase()}_${meta.type.toLowerCase()}.csv") , emit: file_path
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when


script: // This script is bundled with the pipeline, in nf-core/genomenote/bin/
def script_name = "parse_${meta.ext.toLowerCase()}_${meta.source.toLowerCase()}_${meta.type.toLowerCase()}.py"
def output_file = "${meta.source.toLowerCase()}_${meta.type.toLowerCase()}.csv"
"""
$script_name \\
$json \\
$output_file
cat <<-END_VERSIONS > versions.yml
"${task.process}":
$script_name: \$($script_name --version | cut -d' ' -f2)
END_VERSIONS
"""
}
100 changes: 31 additions & 69 deletions subworkflows/local/genome_metadata.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,8 @@
//

include { RUN_WGET } from '../../modules/local/run_wget'
include { PARSE_ENA_ASSEMBLY } from '../../modules/local/parse_ena_assembly'
include { PARSE_ENA_BIOPROJECT } from '../../modules/local/parse_ena_bioproject'
include { PARSE_ENA_BIOSAMPLE } from '../../modules/local/parse_ena_biosample'
include { PARSE_ENA_TAXONOMY } from '../../modules/local/parse_ena_taxonomy'
include { PARSE_NCBI_ASSEMBLY } from '../../modules/local/parse_ncbi_assembly'
include { PARSE_NCBI_TAXONOMY } from '../../modules/local/parse_ncbi_taxonomy'
include { PARSE_GOAT_ASSEMBLY } from '../../modules/local/parse_goat_assembly'
include { COMBINE_METADATA } from '../../modules/local/combine_metadata'
include { PARSE_METADATA } from '../../modules/local/parse_metadata'
include { COMBINE_METADATA } from '../../modules/local/combine_metadata'
include { POPULATE_TEMPLATE } from '../../modules/local/populate_template'
include { WRITE_TO_GENOME_NOTES_DB } from '../../modules/local/write_to_database'

Expand All @@ -23,20 +17,15 @@ workflow GENOME_METADATA {

main:
ch_versions = Channel.empty()
ch_combined = Channel.empty()


def meta = [:]
meta.id = params.assembly
meta.taxon_id = params.taxon_id
ch_combined_params = Channel.of(meta)


// Define channel for RUN_WGET
ch_file_list
.splitCsv(header: ['source', 'type', 'url', 'ext'], skip: 1)
.map { row -> [
| splitCsv(header: ['source', 'type', 'url', 'ext'], skip: 1)
| map { row ->
[
// meta
[
[ id: params.assembly,
taxon_id: params.taxon_id,
source: row.source,
type: row.type,
ext: row.ext,
Expand All @@ -47,66 +36,38 @@ workflow GENOME_METADATA {
.replaceAll(/TAXONOMY_ID/, params.taxon_id)
.replaceAll(/BIOPROJECT_ACCESSION/, params.bioproject)
.replaceAll(/BIOSAMPLE_ACCESSION/, params.biosample)
] }
.set{file_list}
]
}
| set { file_list }

// Fetch files
RUN_WGET ( file_list )

ch_versions = ch_versions.mix( RUN_WGET.out.versions.first() )

ch_input = RUN_WGET.out.file_path.branch {
ENA_ASSEMBLY: it[0].source == "ENA" && it[0].type == "Assembly"
ENA_BIOPROJECT: it[0].source == "ENA" && it[0].type == "Bioproject"
ENA_BIOSAMPLE: it[0].source == "ENA" && it[0].type == "Biosample"
ENA_TAXONOMY: it[0].source == "ENA" && it[0].type == "Taxonomy"
NCBI_ASSEMBLY: it[0].source == "NCBI" && it[0].type == "Assembly"
NCBI_TAXONOMY: it[0].source == "NCBI" && it[0].type == "Taxonomy"
GOAT_ASSEMBLY: it[0].source == "GOAT" && it[0].type == "Assembly"
RUN_WGET ( file_list )
ch_versions = ch_versions.mix( RUN_WGET.out.versions.first() )

PARSE_METADATA(RUN_WGET.out.file_path)
ch_versions = ch_versions.mix( PARSE_METADATA.out.versions.first() )

PARSE_METADATA.out.file_path
| map { it -> tuple( it[1] )}
| collect
| map { it ->
meta = [:]
meta.id = params.assembly
meta.taxon_id = params.taxon_id
[ meta, it ]
}
| set { ch_parsed_files }

PARSE_ENA_ASSEMBLY ( ch_input.ENA_ASSEMBLY )
ch_versions = ch_versions.mix( PARSE_ENA_ASSEMBLY.out.versions.first() )
ch_combined = ch_combined.concat( PARSE_ENA_ASSEMBLY.out.file_path )

PARSE_ENA_BIOPROJECT ( ch_input.ENA_BIOPROJECT )
ch_versions = ch_versions.mix( PARSE_ENA_BIOPROJECT.out.versions.first() )
ch_combined = ch_combined.concat( PARSE_ENA_BIOPROJECT.out.file_path )


PARSE_ENA_BIOSAMPLE ( ch_input.ENA_BIOSAMPLE )
ch_versions = ch_versions.mix( PARSE_ENA_BIOSAMPLE.out.versions.first() )
ch_combined = ch_combined.concat( PARSE_ENA_BIOSAMPLE.out.file_path )


PARSE_ENA_TAXONOMY ( ch_input.ENA_TAXONOMY )
ch_versions = ch_versions.mix( PARSE_ENA_TAXONOMY.out.versions.first() )
ch_combined = ch_combined.concat( PARSE_ENA_TAXONOMY.out.file_path )

PARSE_NCBI_ASSEMBLY ( ch_input.NCBI_ASSEMBLY )
ch_versions = ch_versions.mix( PARSE_NCBI_ASSEMBLY.out.versions.first() )
ch_combined = ch_combined.concat( PARSE_NCBI_ASSEMBLY.out.file_path )

PARSE_NCBI_TAXONOMY ( ch_input.NCBI_TAXONOMY )
ch_versions = ch_versions.mix( PARSE_NCBI_TAXONOMY.out.versions.first() )
ch_combined = ch_combined.concat( PARSE_NCBI_TAXONOMY.out.file_path )

PARSE_GOAT_ASSEMBLY ( ch_input.GOAT_ASSEMBLY)
ch_versions = ch_versions.mix( PARSE_GOAT_ASSEMBLY.out.versions.first() )
ch_combined = ch_combined.concat( PARSE_GOAT_ASSEMBLY.out.file_path )
COMBINE_METADATA(ch_parsed_files)
ch_versions = ch_versions.mix( COMBINE_METADATA.out.versions.first() )

ch_combined = ch_combined.collect(flat: false)
ch_combined_params = ch_combined_params.concat(ch_combined).collect(flat: false)

COMBINE_METADATA(ch_combined_params)
ch_versions = ch_versions.mix( COMBINE_METADATA.out.versions.first() )

COMBINE_METADATA.out.consistent
.multiMap { it ->
| multiMap { it ->
TEMPLATE: it
DB: it
}
.set { ch_params_consistent }
| set { ch_params_consistent }

POPULATE_TEMPLATE( ch_params_consistent.TEMPLATE, ch_note_template )
ch_versions = ch_versions.mix( POPULATE_TEMPLATE.out.versions.first() )
Expand All @@ -120,4 +81,5 @@ workflow GENOME_METADATA {
emit:
template = POPULATE_TEMPLATE.out.genome_note // channel: [ docx ]
versions = ch_versions.ifEmpty(null) // channel: [versions.yml]

}

0 comments on commit c64f557

Please sign in to comment.