From 3283530368bc990b3a4ebbf459d0f44f7aa433db Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Fri, 19 Jan 2024 11:53:01 -0800 Subject: [PATCH] Move fetch_sequences.smk to fetch_from_ncbi.smk This is a more accurate name for the rule, since it fetches from NCBI and matches the pathogen-repo-template/ingest/ncbi_fetch_sequences.smk rule. --- ingest/Snakefile | 2 +- ingest/rules/{fetch_sequences.smk => fetch_from_ncbi.smk} | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) rename ingest/rules/{fetch_sequences.smk => fetch_from_ncbi.smk} (94%) diff --git a/ingest/Snakefile b/ingest/Snakefile index fb26cc1..8c916ce 100644 --- a/ingest/Snakefile +++ b/ingest/Snakefile @@ -54,7 +54,7 @@ rule all: _get_all_targets, -include: "rules/fetch_sequences.smk" +include: "rules/fetch_from_ncbi.smk" include: "rules/transform.smk" diff --git a/ingest/rules/fetch_sequences.smk b/ingest/rules/fetch_from_ncbi.smk similarity index 94% rename from ingest/rules/fetch_sequences.smk rename to ingest/rules/fetch_from_ncbi.smk index 2fef4b1..c775819 100644 --- a/ingest/rules/fetch_sequences.smk +++ b/ingest/rules/fetch_from_ncbi.smk @@ -7,6 +7,10 @@ defined in the config. If adding other sources, add a new rule upstream of rule `fetch_all_sequences` to create the file `data/{source}.ndjson` or the file must exist as a static file in the repo. +Fetch with NCBI Datasets (https://www.ncbi.nlm.nih.gov/datasets/) + - requires `ncbi_taxon_id` config + - Only returns metadata fields that are available through NCBI Datasets + Produces final output as sequences_ndjson = "data/sequences.ndjson"