nextstrain · joverlee521 · Nov 7, 2023 · Oct 18, 2023 · Oct 18, 2023 · Oct 18, 2023
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -9,8 +9,23 @@ on:
 
 jobs:
   pathogen-ci:
-    # Temporarily use this ref until the commit is merged into the main repo
-    # PR: https://github.com/nextstrain/.github/pull/62 (opened on 2023-09-27)
-    uses: nextstrain/.github/.github/workflows/pathogen-repo-ci.yaml@4f3074183d6ab612faed84444eefe08a5ded0c69
+    strategy:
+      matrix:
+        runtime: [docker, conda]
+    permissions:
+      id-token: write
+    uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
+    secrets: inherit
     with:
-      build-dir: phylogenetic
+      runtime: ${{ matrix.runtime }}
+      run: |
+        nextstrain build \
+          phylogenetic \
+          --configfile profiles/ci/builds.yaml
-      run: |
-        nextstrain build \
-          phylogenetic \
-          --configfile profiles/ci/builds.yaml
+      run: |
+        mkdir -p phylogenetic/data;
+        cp -r -v phylogenetic/example_data/* phylogenetic/data/;
+        nextstrain build phylogenetic
-      run: |
-        nextstrain build \
-          phylogenetic \
-          --configfile profiles/ci/builds.yaml
+      run: |
+        mkdir -p phylogenetic/data;
+        cp -r -v phylogenetic/example_data/* phylogenetic/data/;
+        nextstrain build phylogenetic
+      artifact-name: output-${{ matrix.runtime }}
+      artifact-paths: |
+        phylogenetic/auspice/
+        phylogenetic/results/
+        phylogenetic/benchmarks/
+        phylogenetic/logs/
+        phylogenetic/.snakemake/log/
diff --git a/phylogenetic/README.md b/phylogenetic/README.md
@@ -9,6 +9,20 @@ Follow the [standard installation instructions](https://docs.nextstrain.org/en/l
 
 ## Usage
 
+### Example build
+
+You can run an example build using the example data provided in this repository via:
+
+```
+nextstrain build .  --configfile profiles/ci/builds.yaml
+```
+
+When the build has finished running, view the output Auspice trees via:
+
+```
+nextstrain view .
+```
+
 ### Provision input data
 
 Input sequences and metadata can be retrieved from data.nextstrain.org

diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile
@@ -57,6 +57,13 @@ if config.get("deploy_url", False):
     include: "workflow/snakemake_rules/nextstrain_automation.smk"
 
 
+# Include custom rules defined in the config.
+if "custom_rules" in config:
+    for rule_file in config["custom_rules"]:
+
+        include: rule_file
+
+
 rule clean:
     """
     Removing directories: {params}

diff --git a/phylogenetic/profiles/ci/builds.yaml b/phylogenetic/profiles/ci/builds.yaml
@@ -0,0 +1,83 @@
+custom_rules:
+  - profiles/ci/copy_example_data.smk
+
+reference: "config/reference.fasta"
+genemap: "config/genemap.gff"
+genbank_reference: "config/reference.gb"
+include: "config/hmpxv1/include.txt"
+clades: "config/clades.tsv"
+lat_longs: "config/lat_longs.tsv"
+auspice_config: "config/hmpxv1/auspice_config.json"
+description: "config/description.md"
+tree_mask: "config/tree_mask.tsv"
+
+# Use `accession` as the ID column since `strain` currently contains duplicates¹.
+# ¹ https://github.com/nextstrain/monkeypox/issues/33
+strain_id_field: "accession"
+display_strain_field: "strain"
+
+build_name: "hmpxv1"
+auspice_name: "mpox_clade-IIb"
+
+filter:
+  exclude: "config/exclude_accessions.txt"
+  min_date: 2017
+  min_length: 100000
+
+
+### Set 1: Non-B.1 sequences: use all
+### Set 2: B.1 sequences: small sample across year/country, maybe month
+subsample:
+  non_b1:
+    group_by: "--group-by lineage year country"
+    sequences_per_group: "--sequences-per-group 50"
+    other_filters: "outbreak!=hMPXV-1 clade!=IIb"
+    exclude_lineages:
+      - B.1
+      - B.1.1
+      - B.1.2
+      - B.1.3
+      - B.1.4
+      - B.1.5
+      - B.1.6
+      - B.1.7
+      - B.1.8
+      - B.1.9
+      - B.1.10
+      - B.1.11
+      - B.1.12
+      - B.1.13
+      - B.1.14
+      - B.1.15
+      - B.1.16
+      - B.1.17
+      - B.1.18
+      - B.1.19
+      - B.1.20
+      - C.1
+  b1:
+    group_by: "--group-by country year"
+    sequences_per_group: "--subsample-max-sequences 100"
+    other_filters: "--exclude-where outbreak!=hMPXV-1 clade!=IIb"
+
+## align
+max_indel: 10000
+seed_spacing: 1000
+
+## treefix
+fix_tree: true
+treefix_root: "--root MK783032"
+
+## refine
+timetree: true
+root: "MK783032 MK783030"
+clock_rate: 5.7e-5
+clock_std_dev: 2e-5
+
+## recency
+recency: true
+
+mask:
+  from_beginning: 800
+  from_end: 6422
+  maskfile: "config/mask.bed"
diff --git a/phylogenetic/profiles/ci/copy_example_data.smk b/phylogenetic/profiles/ci/copy_example_data.smk
@@ -0,0 +1,15 @@
+rule copy_example_data:
+    input:
+        sequences="example_data/sequences.fasta",
+        metadata="example_data/metadata.tsv",
+    output:
+        sequences="data/sequences.fasta",
+        metadata="data/metadata.tsv",
+    shell:
+        """
+        cp -f {input.sequences} {output.sequences}
+        cp -f {input.metadata} {output.metadata}
+        """
+
+
+ruleorder: copy_example_data > decompress