From 5af15a1c1d4061003d39b7475573ccce3ac6a0e5 Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 24 May 2024 10:40:46 -0700 Subject: [PATCH] Add a configurable includes.txt Adds a configurable includes_{serotype}.txt to force-include key strains (e.g. vaccine-related, lineage-defining) for each serotype tree. --- phylogenetic/config/config_dengue.yaml | 1 + phylogenetic/config/include_all.txt | 1 + phylogenetic/config/include_denv1.txt | 1 + phylogenetic/config/include_denv2.txt | 1 + phylogenetic/config/include_denv3.txt | 1 + phylogenetic/config/include_denv4.txt | 1 + phylogenetic/rules/prepare_sequences.smk | 2 ++ 7 files changed, 8 insertions(+) create mode 100644 phylogenetic/config/include_all.txt create mode 100644 phylogenetic/config/include_denv1.txt create mode 100644 phylogenetic/config/include_denv2.txt create mode 100644 phylogenetic/config/include_denv3.txt create mode 100644 phylogenetic/config/include_denv4.txt diff --git a/phylogenetic/config/config_dengue.yaml b/phylogenetic/config/config_dengue.yaml index a110c969..ebd9a318 100644 --- a/phylogenetic/config/config_dengue.yaml +++ b/phylogenetic/config/config_dengue.yaml @@ -9,6 +9,7 @@ display_strain_field: "strain" filter: exclude: "config/exclude.txt" + include: "config/include_{serotype}.txt" group_by: "year region" min_length: genome: 5000 diff --git a/phylogenetic/config/include_all.txt b/phylogenetic/config/include_all.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_all.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/config/include_denv1.txt b/phylogenetic/config/include_denv1.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_denv1.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/config/include_denv2.txt b/phylogenetic/config/include_denv2.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_denv2.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/config/include_denv3.txt b/phylogenetic/config/include_denv3.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_denv3.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/config/include_denv4.txt b/phylogenetic/config/include_denv4.txt new file mode 100644 index 00000000..b98b661e --- /dev/null +++ b/phylogenetic/config/include_denv4.txt @@ -0,0 +1 @@ +# Format: [# ] \ No newline at end of file diff --git a/phylogenetic/rules/prepare_sequences.smk b/phylogenetic/rules/prepare_sequences.smk index 7f651b68..60144f85 100644 --- a/phylogenetic/rules/prepare_sequences.smk +++ b/phylogenetic/rules/prepare_sequences.smk @@ -55,6 +55,7 @@ rule filter: sequences = lambda wildcard: "data/sequences_{serotype}.fasta" if wildcard.gene in ['genome'] else "results/{gene}/sequences_{serotype}.fasta", metadata = "data/metadata_{serotype}.tsv", exclude = config["filter"]["exclude"], + include = config["filter"]["include"], output: sequences = "results/{gene}/filtered_{serotype}.fasta" params: @@ -69,6 +70,7 @@ rule filter: --metadata {input.metadata} \ --metadata-id-columns {params.strain_id} \ --exclude {input.exclude} \ + --include {input.include} \ --output {output.sequences} \ --group-by {params.group_by} \ --sequences-per-group {params.sequences_per_group} \