diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4c99e29b..f4b3e601 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,8 +9,23 @@ on: jobs: pathogen-ci: - # Temporarily use this ref until the commit is merged into the main repo - # PR: https://github.com/nextstrain/.github/pull/62 (opened on 2023-09-27) - uses: nextstrain/.github/.github/workflows/pathogen-repo-ci.yaml@4f3074183d6ab612faed84444eefe08a5ded0c69 + strategy: + matrix: + runtime: [docker, conda] + permissions: + id-token: write + uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master + secrets: inherit with: - build-dir: phylogenetic + runtime: ${{ matrix.runtime }} + run: | + nextstrain build \ + phylogenetic \ + --configfile profiles/ci/builds.yaml + artifact-name: output-${{ matrix.runtime }} + artifact-paths: | + phylogenetic/auspice/ + phylogenetic/results/ + phylogenetic/benchmarks/ + phylogenetic/logs/ + phylogenetic/.snakemake/log/ diff --git a/phylogenetic/README.md b/phylogenetic/README.md index 6a4b5844..4e5c2d80 100644 --- a/phylogenetic/README.md +++ b/phylogenetic/README.md @@ -9,6 +9,20 @@ Follow the [standard installation instructions](https://docs.nextstrain.org/en/l ## Usage +### Example build + +You can run an example build using the example data provided in this repository via: + +``` +nextstrain build . --configfile profiles/ci/builds.yaml +``` + +When the build has finished running, view the output Auspice trees via: + +``` +nextstrain view . +``` + ### Provision input data Input sequences and metadata can be retrieved from data.nextstrain.org diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index f0927a5e..285012c9 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -57,6 +57,13 @@ if config.get("deploy_url", False): include: "workflow/snakemake_rules/nextstrain_automation.smk" +# Include custom rules defined in the config. +if "custom_rules" in config: + for rule_file in config["custom_rules"]: + + include: rule_file + + rule clean: """ Removing directories: {params} diff --git a/phylogenetic/profiles/ci/builds.yaml b/phylogenetic/profiles/ci/builds.yaml new file mode 100644 index 00000000..8ab51370 --- /dev/null +++ b/phylogenetic/profiles/ci/builds.yaml @@ -0,0 +1,83 @@ +custom_rules: + - profiles/ci/copy_example_data.smk + +reference: "config/reference.fasta" +genemap: "config/genemap.gff" +genbank_reference: "config/reference.gb" +include: "config/hmpxv1/include.txt" +clades: "config/clades.tsv" +lat_longs: "config/lat_longs.tsv" +auspice_config: "config/hmpxv1/auspice_config.json" +description: "config/description.md" +tree_mask: "config/tree_mask.tsv" + +# Use `accession` as the ID column since `strain` currently contains duplicates¹. +# ¹ https://github.com/nextstrain/monkeypox/issues/33 +strain_id_field: "accession" +display_strain_field: "strain" + +build_name: "hmpxv1" +auspice_name: "mpox_clade-IIb" + +filter: + exclude: "config/exclude_accessions.txt" + min_date: 2017 + min_length: 100000 + + +### Set 1: Non-B.1 sequences: use all +### Set 2: B.1 sequences: small sample across year/country, maybe month +subsample: + non_b1: + group_by: "--group-by lineage year country" + sequences_per_group: "--sequences-per-group 50" + other_filters: "outbreak!=hMPXV-1 clade!=IIb" + exclude_lineages: + - B.1 + - B.1.1 + - B.1.2 + - B.1.3 + - B.1.4 + - B.1.5 + - B.1.6 + - B.1.7 + - B.1.8 + - B.1.9 + - B.1.10 + - B.1.11 + - B.1.12 + - B.1.13 + - B.1.14 + - B.1.15 + - B.1.16 + - B.1.17 + - B.1.18 + - B.1.19 + - B.1.20 + - C.1 + b1: + group_by: "--group-by country year" + sequences_per_group: "--subsample-max-sequences 100" + other_filters: "--exclude-where outbreak!=hMPXV-1 clade!=IIb" + +## align +max_indel: 10000 +seed_spacing: 1000 + +## treefix +fix_tree: true +treefix_root: "--root MK783032" + +## refine +timetree: true +root: "MK783032 MK783030" +clock_rate: 5.7e-5 +clock_std_dev: 2e-5 + +## recency +recency: true + +mask: + from_beginning: 800 + from_end: 6422 + maskfile: "config/mask.bed" diff --git a/phylogenetic/profiles/ci/copy_example_data.smk b/phylogenetic/profiles/ci/copy_example_data.smk new file mode 100644 index 00000000..447240f3 --- /dev/null +++ b/phylogenetic/profiles/ci/copy_example_data.smk @@ -0,0 +1,15 @@ +rule copy_example_data: + input: + sequences="example_data/sequences.fasta", + metadata="example_data/metadata.tsv", + output: + sequences="data/sequences.fasta", + metadata="data/metadata.tsv", + shell: + """ + cp -f {input.sequences} {output.sequences} + cp -f {input.metadata} {output.metadata} + """ + + +ruleorder: copy_example_data > decompress