Skip to content

Commit

Permalink
review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
priyanka-surana committed Sep 27, 2023
1 parent f09fa15 commit 021565a
Show file tree
Hide file tree
Showing 12 changed files with 80 additions and 55 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ mMelMel1,illumina,GCA_922984935.2.illumina.mMelMel1.cram
mMelMel3,ont,GCA_922984935.2.ont.mMelMel3.cram
```

Each row represents an aligned file. Rows with the same sample identifier are considered technical replicates. The datatype refers to the sequencing technology used to generate the underlying raw data and follows a controlled vocabulary (ont, hic, pacbio, illumina). The aligned read files can be generated using the [sanger-tol/readmapping](https://github.com/sanger-tol/readmapping) pipeline.
Each row represents an aligned file. Rows with the same sample identifier are considered technical replicates. The datatype refers to the sequencing technology used to generate the underlying raw data and follows a controlled vocabulary (ont, hic, pacbio, pacbio_clr illumina). The aligned read files can be generated using the [sanger-tol/readmapping](https://github.com/sanger-tol/readmapping) pipeline.

Now, you can run the pipeline using:

Expand Down
2 changes: 1 addition & 1 deletion assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"datatype": {
"type": "string",
"pattern": "^\\S+$",
"enum": ["hic", "illumina", "ont", "pacbio"],
"enum": ["hic", "illumina", "ont", "pacbio", "pacbio_clr"],
"errorMessage": "Data type, and must be one of: 'hic' or 'illumina' or 'ont' or 'pacbio'"
},
"datafile": {
Expand Down
1 change: 1 addition & 0 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class RowChecker:
"hic",
"illumina",
"pacbio",
"pacbio_clr",
"ont",
)

Expand Down
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ process {
}

withName: "GOAT_TAXONSEARCH" {
ext.args = "-l -b"
ext.args = "--lineage --busco"
}

withName: "SAMTOOLS_VIEW" {
Expand Down
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'
config_profile_description = 'Minimal aligned test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
Expand Down
2 changes: 1 addition & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ params {
input = "${projectDir}/assets/test_full/full_samplesheet.csv"

// Fasta references
fasta = "/lustre/scratch123/tol/resources/nextflow/test-data/Laetiporus_sulphureus/assembly/release/gfLaeSulp1.1/insdc/GCA_927399515.1.fasta.gz"
fasta = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/assembly/release/gfLaeSulp1.1/insdc/GCA_927399515.1.fasta.gz"
accession = "GCA_927399515.1"
taxon = "Laetiporus sulphureus"

Expand Down
11 changes: 6 additions & 5 deletions conf/test_raw.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'
config_profile_name = 'Raw test profile'
config_profile_description = 'Minimal raw test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
Expand All @@ -23,14 +23,15 @@ params {
// Specify the paths to your test data
// Give any required params for the test so that command line flags are not needed
input = "${projectDir}/assets/test/samplesheet_raw.csv"
align = true

// Fasta references
fasta = "/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz"
fasta = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz"
accession = "GCA_922984935.2"
taxon = "Meles meles"

// Databases
taxdump = "/lustre/scratch123/tol/teams/grit/geval_pipeline/btk_databases/taxdump"
busco = "/lustre/scratch123/tol/resources/nextflow/busco_2021_06_reduced/"
uniprot = "${projectDir}/assets/test/mCerEla1.1.buscogenes.dmnd"
busco = "/lustre/scratch123/tol/resources/nextflow/busco/blobtoolkit.GCA_922984935.2.2023-08-03"
uniprot = "https://tolit.cog.sanger.ac.uk/test-data/resources/diamond/mCerEla1.1.buscogenes.dmnd"
}
2 changes: 1 addition & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ sample3,ont,ont.cram
| Column | Description |
| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (\_). |
| `datatype` | Type of sequencing data. Must be one of `hic`, `illumina`, `pacbio`, or `ont`. |
| `datatype` | Type of sequencing data. Must be one of `hic`, `illumina`, `pacbio`, `pacbio_clr` or `ont`. |
| `datafile` | Full path to read data file. |

An [example samplesheet](https://raw.githubusercontent.com/sanger-tol/blobtoolkit/main/assets/test/samplesheet.csv) has been provided with the pipeline.
Expand Down
50 changes: 37 additions & 13 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,63 +8,87 @@
"busco": {
"branch": "master",
"git_sha": "6d6552cb582f56b6101c452e16ee7c23073f91de",
"installed_by": ["modules"],
"installed_by": [
"modules"
],
"patch": "modules/nf-core/busco/busco.diff"
},
"custom/dumpsoftwareversions": {
"branch": "master",
"git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"diamond/blastp": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"fastawindows": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"goat/taxonsearch": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"gunzip": {
"branch": "master",
"git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"minimap2/align": {
"branch": "master",
"git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"mosdepth": {
"branch": "master",
"git_sha": "ebb27711cd5f4de921244bfa81c676504072d31c",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"multiqc": {
"branch": "master",
"git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/fasta": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/index": {
"branch": "master",
"git_sha": "911696ea0b62df80e900ef244d7867d177971f73",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
},
"samtools/view": {
"branch": "master",
"git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f",
"installed_by": ["modules"]
"installed_by": [
"modules"
]
}
}
},
Expand All @@ -73,4 +97,4 @@
}
}
}
}
}
45 changes: 27 additions & 18 deletions subworkflows/local/coverage_stats.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
// Calculate genome coverage and statistics
//

include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
include { MOSDEPTH } from '../../modules/nf-core/mosdepth/main'
include { FASTAWINDOWS } from '../../modules/nf-core/fastawindows/main'
include { CREATE_BED } from '../../modules/local/create_bed'
include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'
include { MOSDEPTH } from '../../modules/nf-core/mosdepth/main'
include { FASTAWINDOWS } from '../../modules/nf-core/fastawindows/main'
include { CREATE_BED } from '../../modules/local/create_bed'


workflow COVERAGE_STATS {
take:
input // channel: [ val(meta), path(aligned) or path(aligned), path(index) ]
input // channel: [ val(meta), path(aln) ]
fasta // channel: [ val(meta), path(fasta) ]


Expand All @@ -19,24 +20,32 @@ workflow COVERAGE_STATS {


// Create aligned BAM and index CSI channel
if (params.align) {

ch_bam_csi = input
input
| branch { meta, aln ->
bam : aln.toString().endsWith("bam") == true
return [ meta, aln ]
cram : aln.toString().endsWith("cram") == true
return [ meta, aln, [] ]
}
| set { ch_aln_idx}

} else {
SAMTOOLS_VIEW ( ch_aln_idx.cram, fasta, [] )
ch_versions = ch_versions.mix ( SAMTOOLS_VIEW.out.versions.first() )

input
| map { meta, cram -> [ meta, cram, [] ] }
| set { ch_cram_crai}
SAMTOOLS_VIEW.out.bam
| join ( SAMTOOLS_VIEW.out.csi )
| set { ch_view }

SAMTOOLS_VIEW ( ch_cram_crai, fasta, [] )
ch_versions = ch_versions.mix ( SAMTOOLS_VIEW.out.versions.first() )
SAMTOOLS_INDEX ( ch_aln_idx.bam )
ch_versions = ch_versions.mix ( SAMTOOLS_INDEX.out.versions.first() )

SAMTOOLS_VIEW.out.bam
| join ( SAMTOOLS_VIEW.out.csi )
| set { ch_bam_csi }
ch_aln_idx.bam
| join ( SAMTOOLS_INDEX.out.csi )
| set { ch_index }

}
ch_view
| mix ( ch_index )
| set { ch_bam_csi }


// Calculate genome statistics
Expand Down
14 changes: 2 additions & 12 deletions subworkflows/local/minimap_alignment.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ include { MINIMAP2_ALIGN as MINIMAP2_ILMN } from '../../modules/nf-core/minimap2
include { MINIMAP2_ALIGN as MINIMAP2_CCS } from '../../modules/nf-core/minimap2/align/main'
include { MINIMAP2_ALIGN as MINIMAP2_CLR } from '../../modules/nf-core/minimap2/align/main'
include { MINIMAP2_ALIGN as MINIMAP2_ONT } from '../../modules/nf-core/minimap2/align/main'
include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main'


workflow MINIMAP2_ALIGNMENT {
Expand Down Expand Up @@ -60,7 +59,7 @@ workflow MINIMAP2_ALIGNMENT {
ch_versions = ch_versions.mix(MINIMAP2_ONT.out.versions.first())


// Index aligned reads
// Combine aligned reads
Channel.empty()
| mix ( MINIMAP2_HIC.out.bam )
| mix ( MINIMAP2_ILMN.out.bam )
Expand All @@ -69,17 +68,8 @@ workflow MINIMAP2_ALIGNMENT {
| mix ( MINIMAP2_ONT.out.bam )
| set { ch_aligned }

SAMTOOLS_INDEX ( ch_aligned )
ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())


// Combine aligned reads and indices
ch_aligned
| join ( SAMTOOLS_INDEX.out.csi )
| set { bam_csi }


emit:
bam_csi // channel: [ val(meta), bam, csi ]
aln = ch_aligned // channel: [ val(meta), bam ]
versions = ch_versions // channel: [ versions.yml ]
}
2 changes: 1 addition & 1 deletion workflows/blobtoolkit.nf
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ workflow BLOBTOOLKIT {
if ( params.align ) {
MINIMAP2_ALIGNMENT ( INPUT_CHECK.out.aln, ch_genome )
ch_versions = ch_versions.mix ( MINIMAP2_ALIGNMENT.out.versions )
ch_aligned = MINIMAP2_ALIGNMENT.out.bam_csi
ch_aligned = MINIMAP2_ALIGNMENT.out.aln
} else {
ch_aligned = INPUT_CHECK.out.aln
}
Expand Down

0 comments on commit 021565a

Please sign in to comment.