From dddd4acedd32f63c376e12acfe8bcfbf902ec432 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 18 Oct 2023 15:57:15 -0700 Subject: [PATCH 1/3] phylogenetic: Allow config to define `custom_rules` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the same method used in ncov¹ and seasonal-flu² for allowing users to add custom rules to the Snakemake workflow. This will be used in subsequent commits to add custom rules for Nextstrain CI workflow that are not required to run the core ingest workflow. ¹ https://github.com/nextstrain/ncov/blob/a29a3b2d98b0835444a88c819925fae07ac9b826/Snakefile#L175-L177 ² https://github.com/nextstrain/seasonal-flu/blob/06865c26a2d972252fde15735d001eaec261126f/Snakefile#L40-L42 --- phylogenetic/Snakefile | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index f0927a5e..285012c9 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -57,6 +57,13 @@ if config.get("deploy_url", False): include: "workflow/snakemake_rules/nextstrain_automation.smk" +# Include custom rules defined in the config. +if "custom_rules" in config: + for rule_file in config["custom_rules"]: + + include: rule_file + + rule clean: """ Removing directories: {params} From 5d6f898167322297de70ea06a01dcfcd3e010e8f Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 18 Oct 2023 16:04:39 -0700 Subject: [PATCH 2/3] phylogenetic: Add profile for CI example build Adds custom rule to copy over the example data to be able to run the build with example data. This allows users to run the example build without having to manually copy over the data. --- phylogenetic/README.md | 14 ++++ phylogenetic/profiles/ci/builds.yaml | 83 +++++++++++++++++++ .../profiles/ci/copy_example_data.smk | 15 ++++ 3 files changed, 112 insertions(+) create mode 100644 phylogenetic/profiles/ci/builds.yaml create mode 100644 phylogenetic/profiles/ci/copy_example_data.smk diff --git a/phylogenetic/README.md b/phylogenetic/README.md index 6a4b5844..4e5c2d80 100644 --- a/phylogenetic/README.md +++ b/phylogenetic/README.md @@ -9,6 +9,20 @@ Follow the [standard installation instructions](https://docs.nextstrain.org/en/l ## Usage +### Example build + +You can run an example build using the example data provided in this repository via: + +``` +nextstrain build . --configfile profiles/ci/builds.yaml +``` + +When the build has finished running, view the output Auspice trees via: + +``` +nextstrain view . +``` + ### Provision input data Input sequences and metadata can be retrieved from data.nextstrain.org diff --git a/phylogenetic/profiles/ci/builds.yaml b/phylogenetic/profiles/ci/builds.yaml new file mode 100644 index 00000000..8ab51370 --- /dev/null +++ b/phylogenetic/profiles/ci/builds.yaml @@ -0,0 +1,83 @@ +custom_rules: + - profiles/ci/copy_example_data.smk + +reference: "config/reference.fasta" +genemap: "config/genemap.gff" +genbank_reference: "config/reference.gb" +include: "config/hmpxv1/include.txt" +clades: "config/clades.tsv" +lat_longs: "config/lat_longs.tsv" +auspice_config: "config/hmpxv1/auspice_config.json" +description: "config/description.md" +tree_mask: "config/tree_mask.tsv" + +# Use `accession` as the ID column since `strain` currently contains duplicates¹. +# ¹ https://github.com/nextstrain/monkeypox/issues/33 +strain_id_field: "accession" +display_strain_field: "strain" + +build_name: "hmpxv1" +auspice_name: "mpox_clade-IIb" + +filter: + exclude: "config/exclude_accessions.txt" + min_date: 2017 + min_length: 100000 + + +### Set 1: Non-B.1 sequences: use all +### Set 2: B.1 sequences: small sample across year/country, maybe month +subsample: + non_b1: + group_by: "--group-by lineage year country" + sequences_per_group: "--sequences-per-group 50" + other_filters: "outbreak!=hMPXV-1 clade!=IIb" + exclude_lineages: + - B.1 + - B.1.1 + - B.1.2 + - B.1.3 + - B.1.4 + - B.1.5 + - B.1.6 + - B.1.7 + - B.1.8 + - B.1.9 + - B.1.10 + - B.1.11 + - B.1.12 + - B.1.13 + - B.1.14 + - B.1.15 + - B.1.16 + - B.1.17 + - B.1.18 + - B.1.19 + - B.1.20 + - C.1 + b1: + group_by: "--group-by country year" + sequences_per_group: "--subsample-max-sequences 100" + other_filters: "--exclude-where outbreak!=hMPXV-1 clade!=IIb" + +## align +max_indel: 10000 +seed_spacing: 1000 + +## treefix +fix_tree: true +treefix_root: "--root MK783032" + +## refine +timetree: true +root: "MK783032 MK783030" +clock_rate: 5.7e-5 +clock_std_dev: 2e-5 + +## recency +recency: true + +mask: + from_beginning: 800 + from_end: 6422 + maskfile: "config/mask.bed" diff --git a/phylogenetic/profiles/ci/copy_example_data.smk b/phylogenetic/profiles/ci/copy_example_data.smk new file mode 100644 index 00000000..447240f3 --- /dev/null +++ b/phylogenetic/profiles/ci/copy_example_data.smk @@ -0,0 +1,15 @@ +rule copy_example_data: + input: + sequences="example_data/sequences.fasta", + metadata="example_data/metadata.tsv", + output: + sequences="data/sequences.fasta", + metadata="data/metadata.tsv", + shell: + """ + cp -f {input.sequences} {output.sequences} + cp -f {input.metadata} {output.metadata} + """ + + +ruleorder: copy_example_data > decompress From b9b677f7066c88f3d15345385322844381fe5cf6 Mon Sep 17 00:00:00 2001 From: Jover Lee Date: Wed, 18 Oct 2023 16:07:44 -0700 Subject: [PATCH 3/3] CI: Use `pathogen-repo-build` workflow Since the `pathogen-repo-ci` workflow does not allow for custom build directories, use the `pathogen-repo-build` workflow instead. --- .github/workflows/ci.yaml | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4c99e29b..f4b3e601 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,8 +9,23 @@ on: jobs: pathogen-ci: - # Temporarily use this ref until the commit is merged into the main repo - # PR: https://github.com/nextstrain/.github/pull/62 (opened on 2023-09-27) - uses: nextstrain/.github/.github/workflows/pathogen-repo-ci.yaml@4f3074183d6ab612faed84444eefe08a5ded0c69 + strategy: + matrix: + runtime: [docker, conda] + permissions: + id-token: write + uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master + secrets: inherit with: - build-dir: phylogenetic + runtime: ${{ matrix.runtime }} + run: | + nextstrain build \ + phylogenetic \ + --configfile profiles/ci/builds.yaml + artifact-name: output-${{ matrix.runtime }} + artifact-paths: | + phylogenetic/auspice/ + phylogenetic/results/ + phylogenetic/benchmarks/ + phylogenetic/logs/ + phylogenetic/.snakemake/log/