Merge branch 'nf-core-template-merge-2.5.1' into dev

nf-core · Sep 11, 2022 · b848d02 · b848d02
2 parents a2c013c + 8781190
commit b848d02
Show file tree

Hide file tree

Showing 18 changed files with 225 additions and 108 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -8,7 +8,7 @@ trim_trailing_whitespace = true
 indent_size = 4
 indent_style = space
 
-[*.{md,yml,yaml,html,css,scss,js}]
+[*.{md,yml,yaml,html,css,scss,js,cff}]
 indent_size = 2
 
 # These files are edited and tested upstream in nf-core/modules

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/gwas
 
 - [ ] This comment contains a description of changes (with reason).
 - [ ] If you've fixed a bug or added code that should be tested, add tests!
-  - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/gwas/tree/master/.github/CONTRIBUTING.md)
-  - [ ] If necessary, also make a PR on the nf-core/gwas _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
+- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/gwas/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/gwas _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
 - [ ] Make sure your code lints (`nf-core lint`).
 - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir <OUTDIR>`).
 - [ ] Usage Documentation in `docs/usage.md` is updated.

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -10,7 +10,6 @@ on:
 
 env:
   NXF_ANSI_LOG: false
-  CAPSULE_LOG: none
 
 jobs:
   test:
@@ -20,27 +19,17 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        # Nextflow versions
-        include:
-          # Test pipeline minimum Nextflow version
-          - NXF_VER: "21.10.3"
-            NXF_EDGE: ""
-          # Test latest edge release of Nextflow
-          - NXF_VER: ""
-            NXF_EDGE: "1"
+        NXF_VER:
+          - "21.10.3"
+          - "latest-everything"
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
 
       - name: Install Nextflow
-        env:
-          NXF_VER: ${{ matrix.NXF_VER }}
-          # Uncomment only if the edge release is more recent than the latest stable release
-          # See https://github.com/nextflow-io/nextflow/issues/2467
-          # NXF_EDGE: ${{ matrix.NXF_EDGE }}
-        run: |
-          wget -qO- get.nextflow.io | bash
-          sudo mv nextflow /usr/local/bin/
+        uses: nf-core/setup-nextflow@v1
+        with:
+          version: "${{ matrix.NXF_VER }}"
 
       - name: Run pipeline with test data
         # TODO nf-core: You can customise CI pipeline run tests as required

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
@@ -35,22 +35,48 @@ jobs:
       - name: Run Prettier --check
         run: prettier --check ${GITHUB_WORKSPACE}
 
+  PythonBlack:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Check code lints with Black
+        uses: psf/black@stable
+
+      # If the above check failed, post a comment on the PR explaining the failure
+      - name: Post PR comment
+        if: failure()
+        uses: mshick/add-pr-comment@v1
+        with:
+          message: |
+            ## Python linting (`black`) is failing
+
+            To keep the code consistent with lots of contributors, we run automated code consistency checks.
+            To fix this CI test, please run:
+
+            * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black`
+            * Fix formatting errors in your pipeline: `black .`
+
+            Once you push these changes the test should pass, and you can hide this comment :+1:
+
+            We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help!
+
+            Thanks again for your contribution!
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
+          allow-repeats: false
+
   nf-core:
     runs-on: ubuntu-latest
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2
 
       - name: Install Nextflow
-        env:
-          CAPSULE_LOG: none
-        run: |
-          wget -qO- get.nextflow.io | bash
-          sudo mv nextflow /usr/local/bin/
+        uses: nf-core/setup-nextflow@v1
 
       - uses: actions/setup-python@v3
         with:
-          python-version: "3.6"
+          python-version: "3.7"
           architecture: "x64"
 
       - name: Install dependencies

diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,56 @@
+cff-version: 1.2.0
+message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication"
+authors:
+  - family-names: Ewels
+    given-names: Philip
+  - family-names: Peltzer
+    given-names: Alexander
+  - family-names: Fillinger
+    given-names: Sven
+  - family-names: Patel
+    given-names: Harshil
+  - family-names: Alneberg
+    given-names: Johannes
+  - family-names: Wilm
+    given-names: Andreas
+  - family-names: Ulysse Garcia
+    given-names: Maxime
+  - family-names: Di Tommaso
+    given-names: Paolo
+  - family-names: Nahnsen
+    given-names: Sven
+title: "The nf-core framework for community-curated bioinformatics pipelines."
+version: 2.4.1
+doi: 10.1038/s41587-020-0439-x
+date-released: 2022-05-16
+url: https://github.com/nf-core/tools
+prefered-citation:
+  type: article
+  authors:
+    - family-names: Ewels
+      given-names: Philip
+    - family-names: Peltzer
+      given-names: Alexander
+    - family-names: Fillinger
+      given-names: Sven
+    - family-names: Patel
+      given-names: Harshil
+    - family-names: Alneberg
+      given-names: Johannes
+    - family-names: Wilm
+      given-names: Andreas
+    - family-names: Ulysse Garcia
+      given-names: Maxime
+    - family-names: Di Tommaso
+      given-names: Paolo
+    - family-names: Nahnsen
+      given-names: Sven
+  doi: 10.1038/s41587-020-0439-x
+  journal: nature biotechnology
+  start: 276
+  end: 278
+  title: "The nf-core framework for community-curated bioinformatics pipelines."
+  issue: 3
+  volume: 38
+  year: 2020
+  url: https://dx.doi.org/10.1038/s41587-020-0439-x
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) Davi Josué Marcon (@Mxrcon), Abhinav Sharma (@abhi18av), Jean-Tristan Brandenburg (@jeantristanb), Lindsay Clark (@lvclark) and Scott Hazelhurst (@shaze)
+Copyright (c) Davi Josué Marcon (@Mxrcon), Abhinav Sharma (@abhi18av), Jean-Tristan Brandenburg (@jeantristanb), Lindsay Clark (@lvclark) and Scott Hazelhurst (@shaze).
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -1,19 +1,14 @@
 # ![nf-core/gwas](docs/images/nf-core-gwas_logo_light.png#gh-light-mode-only) ![nf-core/gwas](docs/images/nf-core-gwas_logo_dark.png#gh-dark-mode-only)
 
-[![GitHub Actions CI Status](https://github.com/nf-core/gwas/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/gwas/actions?query=workflow%3A%22nf-core+CI%22)
-[![GitHub Actions Linting Status](https://github.com/nf-core/gwas/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/gwas/actions?query=workflow%3A%22nf-core+linting%22)
-[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/gwas/results)
-[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX)
+[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/gwas/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
 
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/)
-[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/)
-[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/)
-[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/)
+[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
+[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
+[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)
 [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/gwas)
 
-[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23gwas-4A154B?logo=slack)](https://nfcore.slack.com/channels/gwas)
-[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core)
-[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core)
+[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23gwas-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/gwas)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)
 
 ## Introduction
 
@@ -25,7 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
 
 <!-- TODO nf-core: Add full-sized test dataset and amend the paragraph below if applicable -->
 
-On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/gwas/results).
+On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/gwas/results).
 
 ## Pipeline summary
 
@@ -42,7 +37,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
 3. Download the pipeline and test it on a minimal dataset with a single command:
 
-   ```console
+   ```bash
    nextflow run nf-core/gwas -profile test,YOURPROFILE --outdir <OUTDIR>
    ```
 
@@ -57,7 +52,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
    <!-- TODO nf-core: Update the example "typical command" below used to run the pipeline -->
 
-   ```console
+   ```bash
    nextflow run nf-core/gwas --input samplesheet.csv --outdir <OUTDIR> --genome GRCh37 -profile <docker/singularity/podman/shifter/charliecloud/conda/institute>
    ```
 

diff --git a/assets/email_template.txt b/assets/email_template.txt
@@ -6,7 +6,6 @@
                                         `._,._,'
   nf-core/gwas v${version}
 ----------------------------------------------------
-
 Run Name: $runName
 
 <% if (success){

diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
@@ -11,7 +11,6 @@
 from collections import Counter
 from pathlib import Path
 
-
 logger = logging.getLogger()
 
 
@@ -79,13 +78,15 @@ def validate_and_transform(self, row):
 
     def _validate_sample(self, row):
         """Assert that the sample name exists and convert spaces to underscores."""
-        assert len(row[self._sample_col]) > 0, "Sample input is required."
+        if len(row[self._sample_col]) <= 0:
+            raise AssertionError("Sample input is required.")
         # Sanitize samples slightly.
         row[self._sample_col] = row[self._sample_col].replace(" ", "_")
 
     def _validate_first(self, row):
         """Assert that the first FASTQ entry is non-empty and has the right format."""
-        assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required."
+        if len(row[self._first_col]) <= 0:
+            raise AssertionError("At least the first FASTQ file is required.")
         self._validate_fastq_format(row[self._first_col])
 
     def _validate_second(self, row):
@@ -97,36 +98,46 @@ def _validate_pair(self, row):
         """Assert that read pairs have the same file extension. Report pair status."""
         if row[self._first_col] and row[self._second_col]:
             row[self._single_col] = False
-            assert (
-                Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:]
-            ), "FASTQ pairs must have the same file extensions."
+            first_col_suffix = Path(row[self._first_col]).suffixes[-2:]
+            second_col_suffix = Path(row[self._second_col]).suffixes[-2:]
+            if first_col_suffix != second_col_suffix:
+                raise AssertionError("FASTQ pairs must have the same file extensions.")
         else:
             row[self._single_col] = True
 
     def _validate_fastq_format(self, filename):
         """Assert that a given filename has one of the expected FASTQ extensions."""
-        assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), (
-            f"The FASTQ file has an unrecognized extension: {filename}\n"
-            f"It should be one of: {', '.join(self.VALID_FORMATS)}"
-        )
+        if not any(filename.endswith(extension) for extension in self.VALID_FORMATS):
+            raise AssertionError(
+                f"The FASTQ file has an unrecognized extension: {filename}\n"
+                f"It should be one of: {', '.join(self.VALID_FORMATS)}"
+            )
 
     def validate_unique_samples(self):
         """
         Assert that the combination of sample name and FASTQ filename is unique.
 
-        In addition to the validation, also rename the sample if more than one sample,
-        FASTQ file combination exists.
+        In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the
+        number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment.
 
         """
-        assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique."
-        if len({pair[0] for pair in self._seen}) < len(self._seen):
-            counts = Counter(pair[0] for pair in self._seen)
-            seen = Counter()
-            for row in self.modified:
-                sample = row[self._sample_col]
-                seen[sample] += 1
-                if counts[sample] > 1:
-                    row[self._sample_col] = f"{sample}_T{seen[sample]}"
+        if len(self._seen) != len(self.modified):
+            raise AssertionError("The pair of sample name and FASTQ must be unique.")
+        seen = Counter()
+        for row in self.modified:
+            sample = row[self._sample_col]
+            seen[sample] += 1
+            row[self._sample_col] = f"{sample}_T{seen[sample]}"
+
+
+def read_head(handle, num_lines=10):
+    """Read the specified number of lines from the current position in the file."""
+    lines = []
+    for idx, line in enumerate(handle):
+        if idx == num_lines:
+            break
+        lines.append(line)
+    return "".join(lines)
 
 
 def read_head(handle, num_lines=10):
@@ -158,7 +169,7 @@ def sniff_format(handle):
     handle.seek(0)
     sniffer = csv.Sniffer()
     if not sniffer.has_header(peek):
-        logger.critical(f"The given sample sheet does not appear to contain a header.")
+        logger.critical("The given sample sheet does not appear to contain a header.")
         sys.exit(1)
     dialect = sniffer.sniff(peek)
     return dialect
@@ -196,7 +207,8 @@ def check_samplesheet(file_in, file_out):
         reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle))
         # Validate the existence of the expected header columns.
         if not required_columns.issubset(reader.fieldnames):
-            logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.")
+            req_cols = ", ".join(required_columns)
+            logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.")
             sys.exit(1)
         # Validate each row.
         checker = RowChecker()

diff --git a/conf/base.config b/conf/base.config
@@ -26,6 +26,11 @@ process {
     //        adding in your local modules too.
     // TODO nf-core: Customise requirements for specific processes.
     // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
+    withLabel:process_single {
+        cpus   = { check_max( 1                  , 'cpus'    ) }
+        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+    }
     withLabel:process_low {
         cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
         memory = { check_max( 12.GB * task.attempt, 'memory'  ) }