From f2f3581d0f6ccfcd5475d2b74dff8b8c7c9631ac Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Wed, 28 Feb 2024 14:53:34 -0800 Subject: [PATCH 1/2] Standardize auxiliary input files in a "defaults" directory This commit renames the `config` directory to `defaults` in both the ingest phylogenetic workflow. The `defaults.yaml` for configuraiton values is also renamed to `config.yaml` to be consistent with the pathogen-repo-guide. https://github.com/nextstrain/pathogen-repo-guide/tree/main/ingest/defaults --- ingest/Snakefile | 2 +- ingest/{config => defaults}/annotations.tsv | 0 ingest/{config/defaults.yaml => defaults/config.yaml} | 4 ++-- ingest/{config => defaults}/geolocation-rules.tsv | 0 ingest/{config => defaults}/optional.yaml | 0 phylogenetic/Snakefile | 2 +- phylogenetic/{config => defaults}/auspice_config.json | 0 phylogenetic/{config => defaults}/colors.tsv | 0 phylogenetic/{config => defaults}/config_zika.yaml | 0 phylogenetic/{config => defaults}/description.md | 0 phylogenetic/{config => defaults}/dropped_strains.txt | 0 phylogenetic/{config => defaults}/zika_reference.gb | 0 phylogenetic/rules/annotate_phylogeny.smk | 2 +- phylogenetic/rules/export.smk | 6 +++--- phylogenetic/rules/prepare_sequences.smk | 4 ++-- 15 files changed, 10 insertions(+), 10 deletions(-) rename ingest/{config => defaults}/annotations.tsv (100%) rename ingest/{config/defaults.yaml => defaults/config.yaml} (97%) rename ingest/{config => defaults}/geolocation-rules.tsv (100%) rename ingest/{config => defaults}/optional.yaml (100%) rename phylogenetic/{config => defaults}/auspice_config.json (100%) rename phylogenetic/{config => defaults}/colors.tsv (100%) rename phylogenetic/{config => defaults}/config_zika.yaml (100%) rename phylogenetic/{config => defaults}/description.md (100%) rename phylogenetic/{config => defaults}/dropped_strains.txt (100%) rename phylogenetic/{config => defaults}/zika_reference.gb (100%) diff --git a/ingest/Snakefile b/ingest/Snakefile index a99870a..59431de 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -5,7 +5,7 @@ min_version( ) # Snakemake 7.7.0 introduced `retries` directive used in fetch-sequences # Use default configuration values. Override with Snakemake's --configfile/--config options. -configfile: "config/defaults.yaml" +configfile: "defaults/config.yaml" send_slack_notifications = config.get("send_slack_notifications", False) diff --git a/ingest/config/annotations.tsv b/ingest/defaults/annotations.tsv similarity index 100% rename from ingest/config/annotations.tsv rename to ingest/defaults/annotations.tsv diff --git a/ingest/config/defaults.yaml b/ingest/defaults/config.yaml similarity index 97% rename from ingest/config/defaults.yaml rename to ingest/defaults/config.yaml index ae46026..2da4e63 100644 --- a/ingest/config/defaults.yaml +++ b/ingest/defaults/config.yaml @@ -40,7 +40,7 @@ curate: geolocation_rules_url: 'https://raw.githubusercontent.com/nextstrain/ncov-ingest/master/source-data/gisaid_geoLocationRules.tsv' # The path to the local geolocation rules within the pathogen repo # The path should be relative to the ingest directory. - local_geolocation_rules: 'config/geolocation-rules.tsv' + local_geolocation_rules: 'defaults/geolocation-rules.tsv' # List of field names to change where the key is the original field name and the value is the new field name # The original field names should match the ncbi_datasets_fields provided above. # This is the first step in the pipeline, so any references to field names in the configs below should use the new field names @@ -85,7 +85,7 @@ curate: authors_default_value: '?' # Path to the manual annotations file # The path should be relative to the ingest directory - annotations: "config/annotations.tsv" + annotations: "defaults/annotations.tsv" # The ID field in the metadata to use to merge the manual annotations annotations_id: 'genbank_accession' # The ID field in the metadata to use as the sequence id in the output FASTA file diff --git a/ingest/config/geolocation-rules.tsv b/ingest/defaults/geolocation-rules.tsv similarity index 100% rename from ingest/config/geolocation-rules.tsv rename to ingest/defaults/geolocation-rules.tsv diff --git a/ingest/config/optional.yaml b/ingest/defaults/optional.yaml similarity index 100% rename from ingest/config/optional.yaml rename to ingest/defaults/optional.yaml diff --git a/phylogenetic/Snakefile b/phylogenetic/Snakefile index 3612286..d1697fd 100644 --- a/phylogenetic/Snakefile +++ b/phylogenetic/Snakefile @@ -1,4 +1,4 @@ -configfile: "config/config_zika.yaml" +configfile: "defaults/config_zika.yaml" rule all: input: diff --git a/phylogenetic/config/auspice_config.json b/phylogenetic/defaults/auspice_config.json similarity index 100% rename from phylogenetic/config/auspice_config.json rename to phylogenetic/defaults/auspice_config.json diff --git a/phylogenetic/config/colors.tsv b/phylogenetic/defaults/colors.tsv similarity index 100% rename from phylogenetic/config/colors.tsv rename to phylogenetic/defaults/colors.tsv diff --git a/phylogenetic/config/config_zika.yaml b/phylogenetic/defaults/config_zika.yaml similarity index 100% rename from phylogenetic/config/config_zika.yaml rename to phylogenetic/defaults/config_zika.yaml diff --git a/phylogenetic/config/description.md b/phylogenetic/defaults/description.md similarity index 100% rename from phylogenetic/config/description.md rename to phylogenetic/defaults/description.md diff --git a/phylogenetic/config/dropped_strains.txt b/phylogenetic/defaults/dropped_strains.txt similarity index 100% rename from phylogenetic/config/dropped_strains.txt rename to phylogenetic/defaults/dropped_strains.txt diff --git a/phylogenetic/config/zika_reference.gb b/phylogenetic/defaults/zika_reference.gb similarity index 100% rename from phylogenetic/config/zika_reference.gb rename to phylogenetic/defaults/zika_reference.gb diff --git a/phylogenetic/rules/annotate_phylogeny.smk b/phylogenetic/rules/annotate_phylogeny.smk index ad00d0f..0d2fee6 100644 --- a/phylogenetic/rules/annotate_phylogeny.smk +++ b/phylogenetic/rules/annotate_phylogeny.smk @@ -54,7 +54,7 @@ rule translate: input: tree = "results/tree.nwk", node_data = "results/nt_muts.json", - reference = "config/zika_reference.gb" + reference = "defaults/zika_reference.gb" output: node_data = "results/aa_muts.json" shell: diff --git a/phylogenetic/rules/export.smk b/phylogenetic/rules/export.smk index e44b8d5..4b2af55 100644 --- a/phylogenetic/rules/export.smk +++ b/phylogenetic/rules/export.smk @@ -34,9 +34,9 @@ rule export: traits = "results/traits.json", nt_muts = "results/nt_muts.json", aa_muts = "results/aa_muts.json", - colors = "config/colors.tsv", - auspice_config = "config/auspice_config.json", - description = "config/description.md" + colors = "defaults/colors.tsv", + auspice_config = "defaults/auspice_config.json", + description = "defaults/description.md" output: auspice_json = "results/raw_zika.json", root_sequence = "results/raw_zika_root-sequence.json", diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 2ef99b7..96c62f4 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -60,7 +60,7 @@ rule filter: input: sequences = "data/sequences_all.fasta", metadata = "data/metadata_all.tsv", - exclude = "config/dropped_strains.txt", + exclude = "defaults/dropped_strains.txt", output: sequences = "results/filtered.fasta" params: @@ -90,7 +90,7 @@ rule align: """ input: sequences = "results/filtered.fasta", - reference = "config/zika_reference.gb" + reference = "defaults/zika_reference.gb" output: alignment = "results/aligned.fasta" shell: From 1f22128193896dad0af556209efc1ef75f0020a2 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Wed, 28 Feb 2024 15:08:13 -0800 Subject: [PATCH 2/2] For CI config files, we will use the directory "build-configs" instead of "profiles". This is consistent with the pathogen-repo-guide. https://github.com/nextstrain/pathogen-repo-guide/tree/5509de0e315eb6acae513d2df4dfab874ff289b5/phylogenetic/build-configs/ci However, note that the `ingest/profiles` does not need to be renamed because it's actually using the Snakemake Profiles feature. https://github.com/snakemake-profiles/doc --- .github/workflows/ci.yaml | 2 +- .../{profiles => build-configs}/ci/copy_example_data.smk | 0 phylogenetic/build-configs/ci/profiles_config.yaml | 2 ++ phylogenetic/profiles/ci/profiles_config.yaml | 2 -- 4 files changed, 3 insertions(+), 3 deletions(-) rename phylogenetic/{profiles => build-configs}/ci/copy_example_data.smk (100%) create mode 100644 phylogenetic/build-configs/ci/profiles_config.yaml delete mode 100644 phylogenetic/profiles/ci/profiles_config.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 12ce718..1598935 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -18,7 +18,7 @@ jobs: run: | nextstrain build \ phylogenetic \ - --configfile profiles/ci/profiles_config.yaml + --configfile build-configs/ci/profiles_config.yaml artifact-name: output-${{ matrix.runtime }} artifact-paths: | phylogenetic/auspice/ diff --git a/phylogenetic/profiles/ci/copy_example_data.smk b/phylogenetic/build-configs/ci/copy_example_data.smk similarity index 100% rename from phylogenetic/profiles/ci/copy_example_data.smk rename to phylogenetic/build-configs/ci/copy_example_data.smk diff --git a/phylogenetic/build-configs/ci/profiles_config.yaml b/phylogenetic/build-configs/ci/profiles_config.yaml new file mode 100644 index 0000000..ab6a2cb --- /dev/null +++ b/phylogenetic/build-configs/ci/profiles_config.yaml @@ -0,0 +1,2 @@ +custom_rules: + - build-configs/ci/copy_example_data.smk diff --git a/phylogenetic/profiles/ci/profiles_config.yaml b/phylogenetic/profiles/ci/profiles_config.yaml deleted file mode 100644 index 17bad21..0000000 --- a/phylogenetic/profiles/ci/profiles_config.yaml +++ /dev/null @@ -1,2 +0,0 @@ -custom_rules: - - profiles/ci/copy_example_data.smk