diff --git a/README.md b/README.md index 3bb3848..a2a88ba 100755 --- a/README.md +++ b/README.md @@ -59,14 +59,14 @@ nextstrain build \ . \ --config s3_src=s3://nextstrain-data/files/workflows/avian-flu/h5n1 \ -pf \ - auspice/avian-flu_h5n1-cattle-outbreak_pb2_all-time.json \ - auspice/avian-flu_h5n1-cattle-outbreak_pb1_all-time.json \ - auspice/avian-flu_h5n1-cattle-outbreak_pa_all-time.json \ - auspice/avian-flu_h5n1-cattle-outbreak_ha_all-time.json \ - auspice/avian-flu_h5n1-cattle-outbreak_np_all-time.json \ - auspice/avian-flu_h5n1-cattle-outbreak_na_all-time.json \ - auspice/avian-flu_h5n1-cattle-outbreak_mp_all-time.json \ - auspice/avian-flu_h5n1-cattle-outbreak_ns_all-time.json + auspice/avian-flu_h5n1-cattle-outbreak_pb2.json \ + auspice/avian-flu_h5n1-cattle-outbreak_pb1.json \ + auspice/avian-flu_h5n1-cattle-outbreak_pa.json \ + auspice/avian-flu_h5n1-cattle-outbreak_ha.json \ + auspice/avian-flu_h5n1-cattle-outbreak_np.json \ + auspice/avian-flu_h5n1-cattle-outbreak_na.json \ + auspice/avian-flu_h5n1-cattle-outbreak_mp.json \ + auspice/avian-flu_h5n1-cattle-outbreak_ns.json ``` ## Creating a custom build diff --git a/Snakefile b/Snakefile index fcc93c5..a13ed57 100755 --- a/Snakefile +++ b/Snakefile @@ -120,7 +120,7 @@ def clock_rate(w): 'h5n1': {'all-time':'', '2y': clock_rates_h5n1[w.segment]}, 'h7n9': {'all-time':''}, 'h9n2': {'all-time':''}, - 'h5n1-cattle-outbreak': {'all-time': clock_rates_h5n1[w.segment]} + 'h5n1-cattle-outbreak': {'default': clock_rates_h5n1[w.segment]} } return clock_rate[w.subtype][w.time] @@ -132,7 +132,7 @@ def clock_rate_std_dev(w): 'h5n1': {'all-time': '', '2y': '--clock-std-dev 0.00211'}, 'h7n9': {'all-time': ''}, 'h9n2': {'all-time': ''}, - 'h5n1-cattle-outbreak': {'all-time': '--clock-std-dev 0.00211'} + 'h5n1-cattle-outbreak': {'default': '--clock-std-dev 0.00211'} } return clock_rate_std_dev[w.subtype][w.time] @@ -399,7 +399,10 @@ def additional_export_config(wildcards): return args rule export: - message: "Exporting data files for for auspice" + """ + Export the files into results/ and then use a subsequent rule to move these to the + auspice/ directory + """ input: tree = refined_tree, metadata = metadata_by_wildcards, @@ -409,7 +412,7 @@ rule export: auspice_config = files.auspice_config, description = files.description output: - auspice_json = "auspice/avian-flu_{subtype}_{segment}_{time}.json" + auspice_json = "results/avian-flu_{subtype}_{segment}_{time}.json" params: additional_config = additional_export_config shell: @@ -427,6 +430,46 @@ rule export: --output {output.auspice_json} """ +def auspice_name_to_wildcard_name(wildcards): + """ + Used to link Auspice JSONs filenames to their intermediate filename which includes all wildcards. + Examples: + 1. subtype + segment + time in their filename / URL, + e.g. "avian-flu_h5n1_ha_2y.json" (nextstrain.org/avian-flu/h5n1/ha/2y) + maps to subtype=h5n1, segment=ha, time=2y + 2. subtype + segment in their filename / URL, + e.g. "avian-flu_h5n1-cattle-outbreak_ha.json" (nextstrain.org/avian-flu/h5n1-cattle-outbreak/ha) + maps to subtype=h5n1-cattle-outbreak, segment=ha, time=default + """ + parts = wildcards.parts.split("_") + if len(parts)==3: + [subtype, segment, time] = parts + assert segment!='genome', "Genome builds are not available for this build" + return f"results/avian-flu_{subtype}_{segment}_{time}.json" + if len(parts)==2: + [subtype, segment] = parts + assert subtype=='h5n1-cattle-outbreak', "Only h5n1 builds produce an Auspice dataset without a time component in the filename" + return f"results/avian-flu_{subtype}_{segment}_default.json" + raise Exception("Auspice JSON filename requested with an unexpected number of (underscore-separated) parts") + + +rule rename_auspice_datasets: + """ + This allows us to create files in auspice/ which mirror the intended URL structure rather than + the wildcard structure we use in the workflow. + """ + input: + json = auspice_name_to_wildcard_name + output: + json = "auspice/avian-flu_{parts}.json" + wildcard_constraints: + timepart = ".*" + shell: + """ + cp {input.json} {output.json} + """ + + rule clean: message: "Removing directories: {params}" params: diff --git a/rules/cattle-flu.smk b/rules/cattle-flu.smk index 8163e11..5489d46 100644 --- a/rules/cattle-flu.smk +++ b/rules/cattle-flu.smk @@ -11,7 +11,7 @@ rule download_tree: dataset="https://data.nextstrain.org/avian-flu_h5n1-cattle-outbreak_genome.json" wildcard_constraints: subtype="h5n1-cattle-outbreak", - time="all-time", + time="default", shell: """ curl --compressed {params.dataset} -o {output.tree} @@ -27,7 +27,7 @@ rule prune_tree: node_data = "results/tree_{subtype}_{segment}_{time}_outbreak-clade.json", wildcard_constraints: subtype="h5n1-cattle-outbreak", - time="all-time", + time="default", shell: """ python3 scripts/restrict-via-common-ancestor.py \