Skip to content

Commit

Permalink
fixup: Move transform.smk to curate.smk
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 committed Jan 20, 2024
1 parent 115fad2 commit 5fa511a
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 24 deletions.
4 changes: 2 additions & 2 deletions ingest/config/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ ncbi_datasets_fields:
- submitter-affiliation
- submitter-country

# Params for the transform rule
transform:
# Params for the curate rule
curate:
# NCBI fields to rename to Nextstrain field names.
# This is the first step in the pipeline, so any references to field names
# in the configs below should use the new field names
Expand Down
44 changes: 22 additions & 22 deletions ingest/rules/curate.smk
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
This part of the workflow handles transforming the data into standardized
This part of the workflow handles curating the data into standardized
formats and expects input file
sequences_ndjson = "data/sequences.ndjson"
Expand All @@ -9,15 +9,15 @@ This will produce output files as
metadata = "results/metadata.tsv"
sequences = "results/sequences.fasta"
Parameters are expected to be defined in `config.transform`.
Parameters are expected to be defined in `config.curate`.
"""


rule fetch_general_geolocation_rules:
output:
general_geolocation_rules="data/general-geolocation-rules.tsv",
params:
geolocation_rules_url=config["transform"]["geolocation_rules_url"],
geolocation_rules_url=config["curate"]["geolocation_rules_url"],
shell:
"""
curl {params.geolocation_rules_url} > {output.general_geolocation_rules}
Expand All @@ -27,7 +27,7 @@ rule fetch_general_geolocation_rules:
rule concat_geolocation_rules:
input:
general_geolocation_rules="data/general-geolocation-rules.tsv",
local_geolocation_rules=config["transform"]["local_geolocation_rules"],
local_geolocation_rules=config["curate"]["local_geolocation_rules"],
output:
all_geolocation_rules="data/all-geolocation-rules.tsv",
shell:
Expand All @@ -36,32 +36,32 @@ rule concat_geolocation_rules:
"""


rule transform:
rule curate:
input:
sequences_ndjson="data/sequences.ndjson",
all_geolocation_rules="data/all-geolocation-rules.tsv",
annotations=config["transform"]["annotations"],
annotations=config["curate"]["annotations"],
output:
metadata="results/metadata.tsv",
sequences="results/sequences.fasta",
log:
"logs/transform.txt",
"logs/curate.txt",
params:
field_map=config["transform"]["field_map"],
strain_regex=config["transform"]["strain_regex"],
strain_backup_fields=config["transform"]["strain_backup_fields"],
date_fields=config["transform"]["date_fields"],
expected_date_formats=config["transform"]["expected_date_formats"],
articles=config["transform"]["titlecase"]["articles"],
abbreviations=config["transform"]["titlecase"]["abbreviations"],
titlecase_fields=config["transform"]["titlecase"]["fields"],
authors_field=config["transform"]["authors_field"],
authors_default_value=config["transform"]["authors_default_value"],
abbr_authors_field=config["transform"]["abbr_authors_field"],
annotations_id=config["transform"]["annotations_id"],
metadata_columns=config["transform"]["metadata_columns"],
id_field=config["transform"]["id_field"],
sequence_field=config["transform"]["sequence_field"],
field_map=config["curate"]["field_map"],
strain_regex=config["curate"]["strain_regex"],
strain_backup_fields=config["curate"]["strain_backup_fields"],
date_fields=config["curate"]["date_fields"],
expected_date_formats=config["curate"]["expected_date_formats"],
articles=config["curate"]["titlecase"]["articles"],
abbreviations=config["curate"]["titlecase"]["abbreviations"],
titlecase_fields=config["curate"]["titlecase"]["fields"],
authors_field=config["curate"]["authors_field"],
authors_default_value=config["curate"]["authors_default_value"],
abbr_authors_field=config["curate"]["abbr_authors_field"],
annotations_id=config["curate"]["annotations_id"],
metadata_columns=config["curate"]["metadata_columns"],
id_field=config["curate"]["id_field"],
sequence_field=config["curate"]["sequence_field"],
shell:
"""
(cat {input.sequences_ndjson} \
Expand Down

0 comments on commit 5fa511a

Please sign in to comment.