From 0ff6b2d7d14b9a148a7b6659db4216240ccae28f Mon Sep 17 00:00:00 2001 From: hadleyking Date: Thu, 4 Apr 2024 10:46:25 -0400 Subject: [PATCH] Update test DB and testing functions Changes to be committed: modified: config/fixtures/local_data.json modified: tests/fixtures/test_data.json deleted: tests/test_apis/test_api_objects_drafts_create.py deleted: tests/test_apis/test_auth_add.py deleted: tests/test_apis/test_auth_remove.py deleted: tests/test_apis/test_auth_reset_token.py renamed: tests/test_apis/test_objects_drafts_create.py -> tests/test_apis/test_biocompute/objects_drafts_create.py deleted: tests/test_apis/test_prefixes_create.py deleted: tests/test_apis/test_prefixes_modify.py --- config/fixtures/local_data.json | 3264 +++++++++++------ tests/fixtures/test_data.json | 3172 +++++++++++----- .../test_api_objects_drafts_create.py | 135 - tests/test_apis/test_auth_add.py | 60 - tests/test_apis/test_auth_remove.py | 51 - tests/test_apis/test_auth_reset_token.py | 34 - .../objects_drafts_create.py} | 0 tests/test_apis/test_prefixes_create.py | 154 - tests/test_apis/test_prefixes_modify.py | 149 - 9 files changed, 4362 insertions(+), 2657 deletions(-) delete mode 100644 tests/test_apis/test_api_objects_drafts_create.py delete mode 100644 tests/test_apis/test_auth_add.py delete mode 100644 tests/test_apis/test_auth_remove.py delete mode 100644 tests/test_apis/test_auth_reset_token.py rename tests/test_apis/{test_objects_drafts_create.py => test_biocompute/objects_drafts_create.py} (100%) delete mode 100644 tests/test_apis/test_prefixes_create.py delete mode 100644 tests/test_apis/test_prefixes_modify.py diff --git a/config/fixtures/local_data.json b/config/fixtures/local_data.json index 2bc2334..347beb9 100644 --- a/config/fixtures/local_data.json +++ b/config/fixtures/local_data.json @@ -1,4 +1,17 @@ [ + { + "model": "admin.logentry", + "pk": 1, + "fields": { + "action_time": "2024-04-03T11:36:31.006Z", + "user": 7, + "content_type": 12, + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "object_repr": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Authorized users\"]}}]" + } + }, { "model": "auth.permission", "pk": 1, @@ -738,7 +751,7 @@ "pk": 7, "fields": { "password": "pbkdf2_sha256$260000$srfwJ6ZrNVTgwiJkjQcKe5$c5V7Bp58Ad7+SwZdUlFiHAI66ArV1fREWg/h/6flpa8=", - "last_login": "2024-04-03T10:39:32Z", + "last_login": "2024-04-03T10:58:13.425Z", "is_superuser": true, "username": "bco_api_user", "first_name": "", @@ -758,115 +771,19 @@ } }, { - "model": "contenttypes.contenttype", - "pk": 1, - "fields": { - "app_label": "admin", - "model": "logentry" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 2, - "fields": { - "app_label": "auth", - "model": "permission" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 3, - "fields": { - "app_label": "auth", - "model": "group" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 4, - "fields": { - "app_label": "auth", - "model": "user" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 5, - "fields": { - "app_label": "contenttypes", - "model": "contenttype" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 6, - "fields": { - "app_label": "sessions", - "model": "session" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 7, - "fields": { - "app_label": "authtoken", - "model": "token" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 8, - "fields": { - "app_label": "authtoken", - "model": "tokenproxy" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 9, - "fields": { - "app_label": "blacklist", - "model": "blacklistedtoken" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 10, - "fields": { - "app_label": "authentication", - "model": "authentication" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 11, - "fields": { - "app_label": "authentication", - "model": "newuser" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 12, - "fields": { - "app_label": "biocompute", - "model": "bco" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 13, + "model": "authtoken.token", + "pk": "1ef53d4042d14299918a4e1f21d2be128a2a7427", "fields": { - "app_label": "prefix", - "model": "prefix" + "user": 5, + "created": "2024-03-14T15:21:04.318Z" } }, { "model": "authtoken.token", - "pk": "1ef53d4042d14299918a4e1f21d2be128a2a7427", + "pk": "49020e6fb85eb19a15bbdfb5cf6a1a28aaa8c1ce", "fields": { - "user": 5, - "created": "2024-03-14T15:21:04.318Z" + "user": 7, + "created": "2024-04-03T10:53:08.951Z" } }, { @@ -893,14 +810,6 @@ "created": "2024-03-14T15:21:09.348Z" } }, - { - "model": "authtoken.token", - "pk": "49020e6fb85eb19a15bbdfb5cf6a1a28aaa8c1ce", - "fields": { - "user": 7, - "created": "2024-04-03T10:53:08.951Z" - } - }, { "model": "authentication.authentication", "pk": 1, @@ -930,22 +839,31 @@ "pk": "http://127.0.0.1:8000/BCO_000000/1.0", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.3", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:06:43.263Z", - "review": [], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ "createdBy", "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ "curatedBy", - "importedBy", "contributedBy" ], "name": "Stephanie Singleton", @@ -954,89 +872,83 @@ }, { "contribution": [ - "createdBy" + "createdBy", + "curatedBy" ], "name": "Jonathon Keeney", "affiliation": "The George Washington University ", "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "MIT" + "review": [] }, "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], "platform": [], "pipeline_steps": [ { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } - ], - "version": "1.1" + ] }, { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } - ], - "version": "1.1" + ] } ] }, @@ -1044,26 +956,29 @@ "script": [ { "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "python3", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "Python", - "version": "3", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], "environment_variables": {} @@ -1072,25 +987,42 @@ "input_subdomain": [ { "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/plain", + "mediatype": "text/tsv", "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, "extension_domain": [ { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", "dataset_extension": { "additional_license": { "data_license": "https://creativecommons.org/licenses/by/4.0/", @@ -1098,28 +1030,28 @@ }, "dataset_categories": [ { - "category_value": "Influenza A", + "category_value": "Other", "category_name": "species" }, { - "category_value": "nucleotide", + "category_value": "Other", "category_name": "molecule" }, { - "category_value": "Influenza A", - "category_name": "tag" + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" }, { - "category_value": "fasta", + "category_value": "tsv", "category_name": "file_type" }, { "category_value": "reviewed", "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" } ] } @@ -1129,8 +1061,8 @@ "prefix": "BCO", "owner": "bco_api_user", "state": "PUBLISHED", - "last_update": "2024-04-03T10:47:13Z", - "access_count": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 7, "authorized_users": [] } }, @@ -1139,22 +1071,31 @@ "pk": "http://127.0.0.1:8000/BCO_000000/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.1", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:10:12.804Z", - "review": [], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ "createdBy", "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ "curatedBy", - "importedBy", "contributedBy" ], "name": "Stephanie Singleton", @@ -1163,89 +1104,83 @@ }, { "contribution": [ - "createdBy" + "createdBy", + "curatedBy" ], "name": "Jonathon Keeney", "affiliation": "The George Washington University ", "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "MIT" + "review": [] }, "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], "platform": [], "pipeline_steps": [ { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } - ], - "version": "1.1" + ] }, { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } - ], - "version": "1.1" + ] } ] }, @@ -1253,26 +1188,29 @@ "script": [ { "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "python3", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "Python", - "version": "3", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], "environment_variables": {} @@ -1281,25 +1219,42 @@ "input_subdomain": [ { "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/plain", + "mediatype": "text/tsv", "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, "extension_domain": [ { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", "dataset_extension": { "additional_license": { "data_license": "https://creativecommons.org/licenses/by/4.0/", @@ -1307,28 +1262,28 @@ }, "dataset_categories": [ { - "category_value": "Influenza A", + "category_value": "Other", "category_name": "species" }, { - "category_value": "nucleotide", + "category_value": "Other", "category_name": "molecule" }, { - "category_value": "Influenza A", - "category_name": "tag" + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" }, { - "category_value": "fasta", + "category_value": "tsv", "category_name": "file_type" }, { "category_value": "reviewed", "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" } ] } @@ -1338,8 +1293,8 @@ "prefix": "BCO", "owner": "bco_api_user", "state": "DRAFT", - "last_update": "2024-04-03T10:45:47Z", - "access_count": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 23, "authorized_users": [] } }, @@ -1348,161 +1303,114 @@ "pk": "http://127.0.0.1:8000/BCO_000001/1.0", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.0", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], "name": "Charles Hadley King", "affiliation": "George Washington University", "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { "contribution": [ "createdBy", "curatedBy" ], - "orcid": "https://orcid.org/0000-0003-1409-4549" + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" }, { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", + "name": "Raja Mazumder", "contribution": [ - "authoredBy" - ] + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" + "review": [] }, "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], + "platform": [], "pipeline_steps": [ { "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" }, { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" }, { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } ] }, { "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } ] } @@ -1512,190 +1420,104 @@ "script": [ { "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "shell", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" }, { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } + "environment_variables": {} }, "io_domain": { "input_subdomain": [ { "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", + "mediatype": "text/tsv", "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], + "parametric_domain": [], "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } + "empirical_error": {}, + "algorithmic_error": {} }, "extension_domain": [ { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] } } ] @@ -1703,7 +1525,7 @@ "prefix": "BCO", "owner": "tester", "state": "PUBLISHED", - "last_update": "2024-04-03T10:47:56Z", + "last_update": "2024-04-04T04:34:54.867Z", "access_count": 0, "authorized_users": [] } @@ -1713,161 +1535,114 @@ "pk": "http://127.0.0.1:8000/BCO_000001/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.1", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], "name": "Charles Hadley King", "affiliation": "George Washington University", "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { "contribution": [ - "createdBy", - "curatedBy" + "curatedBy", + "contributedBy" ], - "orcid": "https://orcid.org/0000-0003-1409-4549" + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" }, { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", "contribution": [ - "authoredBy" - ] - } - ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] }, "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], + "platform": [], "pipeline_steps": [ { "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" }, { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" }, { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } ] }, { "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } ] } @@ -1877,190 +1652,104 @@ "script": [ { "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "shell", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" }, { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } + "environment_variables": {} }, "io_domain": { "input_subdomain": [ { "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", + "mediatype": "text/tsv", "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], + "parametric_domain": [], "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } + "empirical_error": {}, + "algorithmic_error": {} }, "extension_domain": [ { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] } } ] @@ -2068,7 +1757,7 @@ "prefix": "BCO", "owner": "tester", "state": "DRAFT", - "last_update": "2024-04-03T10:35:25Z", + "last_update": "2024-04-04T04:34:54.867Z", "access_count": 0, "authorized_users": [] } @@ -2078,124 +1767,1558 @@ "pk": "http://127.0.0.1:8000/BCO_000002/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0", - "created": "2018-11-29T11:29:08-0500", - "modified": "2022-06-28T23:19:38.283Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], "name": "Charles Hadley King", "affiliation": "George Washington University", "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { "contribution": [ - "createdBy", "curatedBy", - "authoredBy" + "contributedBy" ], - "orcid": "https://orcid.org/0000-0003-1409-4549" + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" }, { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", "contribution": [ "createdBy", - "curatedBy", - "authoredBy" + "curatedBy" ], - "orcid": "https://orcid.org/0000-0001-88238-9945" + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" + "review": [] }, "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } ], - "platform": [ - "hive" + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "jdoe", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000004/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000004/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0d692b94bc2528660470e38c78708cc06605f941", + "provenance_domain": { + "name": "", + "version": "", + "license": "", + "created": "2024-04-04T12:53:33", + "modified": "2024-04-04T12:53:33.679Z", + "contributors": [ + { + "name": "", + "affiliation": "", + "email": "", + "contribution": [], + "orcid": "" + } + ] + }, + "usability_domain": [], + "description_domain": { + "pipeline_steps": [] + }, + "parametric_domain": [], + "io_domain": {}, + "execution_domain": { + "script": [], + "script_driver": "", + "software_prerequisites": [], + "external_data_endpoints": [], + "environment_variables": {} + }, + "extension_domain": [], + "error_domain": {} + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-04T13:00:38.650Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "PUBLISHED", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 3, + "authorized_users": [ + 5 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "bco_api_user", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [ + 4 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000002/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000004/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], + "platform": [], "pipeline_steps": [ { "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], "input_list": [ { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" }, { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } ] }, { "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } ] } @@ -2205,192 +3328,146 @@ "script": [ { "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "shell", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" }, { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } + "environment_variables": {} }, "io_domain": { "input_subdomain": [ { "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", + "mediatype": "text/tsv", "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], + "parametric_domain": [], "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } + "empirical_error": {}, + "algorithmic_error": {} }, "extension_domain": [ { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] } } ] }, - "prefix": "BCO", + "prefix": "TEST", "owner": "hivelab", "state": "DRAFT", - "last_update": "2024-04-03T10:35:59Z", + "last_update": "2024-04-04T04:34:54.867Z", "access_count": 0, "authorized_users": [] } }, { "model": "biocompute.bco", - "pk": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "pk": "http://127.0.0.1:8000/TEST_000005/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "SARS-CoV-2 reference proteome sequences", - "version": "1.0", - "created": "2021-12-16T21:06:50.969977Z", - "modified": "2022-06-28T23:21:47.218Z", - "review": [], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ "createdBy", "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ "curatedBy", - "importedBy", "contributedBy" ], "name": "Stephanie Singleton", @@ -2399,89 +3476,83 @@ }, { "contribution": [ - "createdBy" + "createdBy", + "curatedBy" ], "name": "Jonathon Keeney", "affiliation": "The George Washington University ", "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "MIT" + "review": [] }, "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], "platform": [], "pipeline_steps": [ { "step_number": 1, - "name": "Download all available files from UniProt", - "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", - "access_time": "2021-12-16T21:06:50.969977Z" - } - } - ], + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", - "filename": "UP000464024_2697049.fasta.gz", - "access_time": "2021-12-16T21:06:50.969977Z" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } - ], - "version": "1.0" + ] }, { "step_number": 2, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "SARS-CoV-2 genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", - "filename": "sars-cov-2_UP000464024_proteome_sequences.json" - } - } - ], + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", - "filename": "sars-cov-2_UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } - ], - "version": "1.0" + ] } ] }, @@ -2489,26 +3560,29 @@ "script": [ { "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "python3", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "Python", - "version": "3.10.0", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], "environment_variables": {} @@ -2517,22 +3591,39 @@ "input_subdomain": [ { "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta.gz" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/plain", + "mediatype": "text/tsv", "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, "extension_domain": [ { "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", @@ -2543,59 +3634,57 @@ }, "dataset_categories": [ { - "category_value": "SARS-CoV-2", + "category_value": "Other", "category_name": "species" }, { - "category_value": "protein", + "category_value": "Other", "category_name": "molecule" }, { - "category_value": "SARS-CoV-2", - "category_name": "tag" + "category_value": "non-core", + "category_name": "priority" }, { - "category_value": "fasta", - "category_name": "file_type" + "category_value": "Dictionary", + "category_name": "species" }, { - "category_value": "non-core", - "category_name": "priority" + "category_value": "tsv", + "category_name": "file_type" }, { "category_value": "reviewed", "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" } ] } } ] }, - "prefix": "BCO", - "owner": "jdoe", + "prefix": "TEST", + "owner": "hivelab", "state": "DRAFT", - "last_update": "2024-04-03T10:44:53Z", + "last_update": "2024-04-04T04:34:54.867Z", "access_count": 0, "authorized_users": [] } }, { "model": "biocompute.bco", - "pk": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "pk": "http://127.0.0.1:8000/TEST_000006/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { "name": "ARGOSdb QC related annotation data property list", - "version": "1.0", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", "created": "2022-02-07T17:36:05.872Z", - "modified": "2022-02-15T14:35:54.116922", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ @@ -2636,7 +3725,7 @@ "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + "review": [] }, "usability_domain": [ "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", @@ -2651,6 +3740,7 @@ "ontology", "controlled vocabulary" ], + "platform": [], "pipeline_steps": [ { "step_number": 1, @@ -2804,17 +3894,17 @@ } ] }, - "prefix": "NOPUB", - "owner": "tester", - "state": "PUBLISHED", - "last_update": "2024-04-03T10:49:17Z", - "access_count": 0, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, "authorized_users": [] } }, { "model": "biocompute.bco", - "pk": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "pk": "http://127.0.0.1:8000/TEST_000007/DRAFT", "fields": { "contents": { "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", @@ -2822,9 +3912,11 @@ "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { "name": "ARGOSdb QC related annotation data property list", - "version": "1.2", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", "created": "2022-02-07T17:36:05.872Z", "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ @@ -2865,7 +3957,7 @@ "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + "review": [] }, "usability_domain": [ "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", @@ -3034,11 +4126,11 @@ } ] }, - "prefix": "NOPUB", - "owner": "tester", + "prefix": "TEST", + "owner": "hivelab", "state": "DRAFT", - "last_update": "2024-04-03T10:46:35Z", - "access_count": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, "authorized_users": [] } }, @@ -3050,7 +4142,7 @@ "created": "2024-03-14T13:53:59Z", "description": "Default prefix for all BioCompute Objects", "owner": "AnonymousUser", - "counter": 0, + "counter": 4, "public": true } }, @@ -3074,7 +4166,7 @@ "created": "2024-03-14T13:53:59Z", "description": "Test prefix", "owner": "tester", - "counter": 0, + "counter": 7, "public": true } } diff --git a/tests/fixtures/test_data.json b/tests/fixtures/test_data.json index c3ec255..347beb9 100644 --- a/tests/fixtures/test_data.json +++ b/tests/fixtures/test_data.json @@ -1,4 +1,17 @@ [ + { + "model": "admin.logentry", + "pk": 1, + "fields": { + "action_time": "2024-04-03T11:36:31.006Z", + "user": 7, + "content_type": 12, + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "object_repr": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "action_flag": 2, + "change_message": "[{\"changed\": {\"fields\": [\"Authorized users\"]}}]" + } + }, { "model": "auth.permission", "pk": 1, @@ -478,7 +491,7 @@ }, { "model": "auth.permission", - "pk": 67, + "pk": 54, "fields": { "name": "Can add BCOs with prefix NOPUB", "content_type": 13, @@ -487,7 +500,7 @@ }, { "model": "auth.permission", - "pk": 68, + "pk": 55, "fields": { "name": "Can change BCOs with prefix NOPUB", "content_type": 13, @@ -496,7 +509,7 @@ }, { "model": "auth.permission", - "pk": 69, + "pk": 56, "fields": { "name": "Can delete BCOs with prefix NOPUB", "content_type": 13, @@ -505,7 +518,7 @@ }, { "model": "auth.permission", - "pk": 70, + "pk": 57, "fields": { "name": "Can publish BCOs with prefix NOPUB", "content_type": 13, @@ -514,7 +527,7 @@ }, { "model": "auth.permission", - "pk": 71, + "pk": 58, "fields": { "name": "Can add new user", "content_type": 11, @@ -523,7 +536,7 @@ }, { "model": "auth.permission", - "pk": 72, + "pk": 59, "fields": { "name": "Can change new user", "content_type": 11, @@ -532,7 +545,7 @@ }, { "model": "auth.permission", - "pk": 73, + "pk": 60, "fields": { "name": "Can delete new user", "content_type": 11, @@ -738,7 +751,7 @@ "pk": 7, "fields": { "password": "pbkdf2_sha256$260000$srfwJ6ZrNVTgwiJkjQcKe5$c5V7Bp58Ad7+SwZdUlFiHAI66ArV1fREWg/h/6flpa8=", - "last_login": "2024-04-03T10:39:32Z", + "last_login": "2024-04-03T10:58:13.425Z", "is_superuser": true, "username": "bco_api_user", "first_name": "", @@ -765,6 +778,14 @@ "created": "2024-03-14T15:21:04.318Z" } }, + { + "model": "authtoken.token", + "pk": "49020e6fb85eb19a15bbdfb5cf6a1a28aaa8c1ce", + "fields": { + "user": 7, + "created": "2024-04-03T10:53:08.951Z" + } + }, { "model": "authtoken.token", "pk": "705531f3b2fbf80bb5a5b9d0cf4ee663676b4579", @@ -789,14 +810,6 @@ "created": "2024-03-14T15:21:09.348Z" } }, - { - "model": "authtoken.token", - "pk": "49020e6fb85eb19a15bbdfb5cf6a1a28aaa8c1ce", - "fields": { - "user": 7, - "created": "2024-04-03T10:53:08.951Z" - } - }, { "model": "authentication.authentication", "pk": 1, @@ -826,22 +839,31 @@ "pk": "http://127.0.0.1:8000/BCO_000000/1.0", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.3", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:06:43.263Z", - "review": [], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ "createdBy", "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ "curatedBy", - "importedBy", "contributedBy" ], "name": "Stephanie Singleton", @@ -850,89 +872,83 @@ }, { "contribution": [ - "createdBy" + "createdBy", + "curatedBy" ], "name": "Jonathon Keeney", "affiliation": "The George Washington University ", "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "MIT" + "review": [] }, "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], "platform": [], "pipeline_steps": [ { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } - ], - "version": "1.1" + ] }, { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } - ], - "version": "1.1" + ] } ] }, @@ -940,26 +956,29 @@ "script": [ { "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "python3", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "Python", - "version": "3", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], "environment_variables": {} @@ -968,25 +987,42 @@ "input_subdomain": [ { "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/plain", + "mediatype": "text/tsv", "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, "extension_domain": [ { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", "dataset_extension": { "additional_license": { "data_license": "https://creativecommons.org/licenses/by/4.0/", @@ -994,28 +1030,28 @@ }, "dataset_categories": [ { - "category_value": "Influenza A", + "category_value": "Other", "category_name": "species" }, { - "category_value": "nucleotide", + "category_value": "Other", "category_name": "molecule" }, { - "category_value": "Influenza A", - "category_name": "tag" + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" }, { - "category_value": "fasta", + "category_value": "tsv", "category_name": "file_type" }, { "category_value": "reviewed", "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" } ] } @@ -1025,8 +1061,8 @@ "prefix": "BCO", "owner": "bco_api_user", "state": "PUBLISHED", - "last_update": "2024-04-03T10:47:13Z", - "access_count": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 7, "authorized_users": [] } }, @@ -1035,22 +1071,31 @@ "pk": "http://127.0.0.1:8000/BCO_000000/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "Influenza A reference gene sequences", - "version": "1.1", - "created": "2021-12-01T15:20:13.614Z", - "modified": "2022-06-28T23:10:12.804Z", - "review": [], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ "createdBy", "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ "curatedBy", - "importedBy", "contributedBy" ], "name": "Stephanie Singleton", @@ -1059,89 +1104,83 @@ }, { "contribution": [ - "createdBy" + "createdBy", + "curatedBy" ], "name": "Jonathon Keeney", "affiliation": "The George Washington University ", "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "MIT" + "review": [] }, "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "Influenza A, Complete Genome, FASTA, Genes" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], "platform": [], "pipeline_steps": [ { - "step_number": 0, - "name": "Download files from UniProt", - "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", - "access_time": "2021-12-01T15:20:13.614Z" - } - } - ], + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", - "filename": "UP000009255_211044_DNA.fasta.gz", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } - ], - "version": "1.1" + ] }, { - "step_number": 0, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "Influenza genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", - "filename": "influenza_UP000009255_genome_sequences.json" - } - } - ], + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", - "filename": "influenza_UP000009255_211044_DNA.fasta", - "access_time": "2021-12-01T15:20:13.614Z" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } - ], - "version": "1.1" + ] } ] }, @@ -1149,26 +1188,29 @@ "script": [ { "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "python3", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "Python", - "version": "3", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], "environment_variables": {} @@ -1177,25 +1219,42 @@ "input_subdomain": [ { "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/plain", + "mediatype": "text/tsv", "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", - "filename": "UP000009255_211044_DNA.fasta" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, "extension_domain": [ { - "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", "dataset_extension": { "additional_license": { "data_license": "https://creativecommons.org/licenses/by/4.0/", @@ -1203,28 +1262,28 @@ }, "dataset_categories": [ { - "category_value": "Influenza A", + "category_value": "Other", "category_name": "species" }, { - "category_value": "nucleotide", + "category_value": "Other", "category_name": "molecule" }, { - "category_value": "Influenza A", - "category_name": "tag" + "category_value": "non-core", + "category_name": "priority" }, { - "category_value": "fasta", - "category_name": "file_type" - }, + "category_value": "Dictionary", + "category_name": "species" + }, { - "category_value": "reviewed", - "category_name": "status" + "category_value": "tsv", + "category_name": "file_type" }, { - "category_value": "internal", - "category_name": "scope" + "category_value": "reviewed", + "category_name": "status" } ] } @@ -1234,8 +1293,8 @@ "prefix": "BCO", "owner": "bco_api_user", "state": "DRAFT", - "last_update": "2024-04-03T10:45:47Z", - "access_count": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 23, "authorized_users": [] } }, @@ -1244,161 +1303,114 @@ "pk": "http://127.0.0.1:8000/BCO_000001/1.0", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.0", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], "name": "Charles Hadley King", "affiliation": "George Washington University", "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { "contribution": [ "createdBy", "curatedBy" ], - "orcid": "https://orcid.org/0000-0003-1409-4549" + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" }, { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", + "name": "Raja Mazumder", "contribution": [ - "authoredBy" - ] + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" + "review": [] }, "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], + "platform": [], "pipeline_steps": [ { "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" }, { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" }, { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } ] }, { "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } ] } @@ -1408,190 +1420,104 @@ "script": [ { "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "shell", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" }, { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } + "environment_variables": {} }, "io_domain": { "input_subdomain": [ { "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", + "mediatype": "text/tsv", "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], + "parametric_domain": [], "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } + "empirical_error": {}, + "algorithmic_error": {} }, "extension_domain": [ { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] } } ] @@ -1599,7 +1525,7 @@ "prefix": "BCO", "owner": "tester", "state": "PUBLISHED", - "last_update": "2024-04-03T10:47:56Z", + "last_update": "2024-04-04T04:34:54.867Z", "access_count": 0, "authorized_users": [] } @@ -1609,161 +1535,114 @@ "pk": "http://127.0.0.1:8000/BCO_000001/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "HCV1a ledipasvir resistance SNP detection", - "version": "1.1", - "created": "2017-01-24T09:40:17-0500", - "modified": "2022-06-28T23:12:50.369Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", - "date": "2017-11-12T12:30:48-0400", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - }, - { - "status": "approved", - "reviewer_comment": "The revised BCO looks fine", - "date": "2017-12-12T12:30:48-0400", - "reviewer": { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", - "contribution": [ - "curatedBy" - ] - } - } - ], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], "name": "Charles Hadley King", "affiliation": "George Washington University", "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { "contribution": [ "createdBy", "curatedBy" ], - "orcid": "https://orcid.org/0000-0003-1409-4549" + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" }, { - "name": "Eric Donaldson", - "affiliation": "FDA", - "email": "Eric.Donaldson@fda.hhs.gov", + "name": "Raja Mazumder", "contribution": [ - "authoredBy" - ] + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" + "review": [] }, "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", - "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "HCV1a", - "Ledipasvir", - "antiviral resistance", - "SNP", - "amino acid substitutions" - ], - "platform": [ - "HIVE" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], + "platform": [], "pipeline_steps": [ { "step_number": 1, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", - "prerequisite": [ - { - "name": "Hepatitis C virus genotype 1", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus type 1b complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ { - "name": "Hepatitis C virus clone J8CF, complete genome", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" }, { - "name": "Hepatitis C virus S52 polyprotein gene", - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" - } - } - ], - "input_list": [ - { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" }, { - "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "http://example.com/data/514769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } ] }, { "step_number": 2, - "name": "HIVE-heptagon", - "description": "variant calling", - "version": "1.3", + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "http://example.com/data/514801/SNPProfile.csv", - "access_time": "2017-01-24T09:40:17-0500" - }, - { - "uri": "http://example.com/data/14769/allCount-aligned.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } ] } @@ -1773,190 +1652,104 @@ "script": [ { "uri": { - "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "shell", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "HIVE-hexagon", - "version": "babajanian.1", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500", - "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" - } - }, - { - "name": "HIVE-heptagon", - "version": "albinoni.2", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "HIVE", - "url": "http://example.com/dna.cgi?cmd=login" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" }, { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], - "environment_variables": { - "HOSTTYPE": "x86_64-linux", - "EDITOR": "vim" - } + "environment_variables": {} }, "io_domain": { "input_subdomain": [ { "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", + "mediatype": "text/tsv", "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "1" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "1" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "1" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], + "parametric_domain": [], "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } + "empirical_error": {}, + "algorithmic_error": {} }, "extension_domain": [ { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", - "fhir_extension": [ - { - "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", - "fhir_version": "3", - "fhir_resources": [ - { - "fhir_resource": "Sequence", - "fhir_id": "21376" - }, - { - "fhir_resource": "DiagnosticReport", - "fhir_id": "6288583" - }, - { - "fhir_resource": "ProcedureRequest", - "fhir_id": "25544" - }, - { - "fhir_resource": "Observation", - "fhir_id": "92440" - }, - { - "fhir_resource": "FamilyMemberHistory", - "fhir_id": "4588936" - } - ] - } - ] - }, - { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/example/repo1", - "scm_type": "git", - "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", - "scm_path": "workflow/hive-viral-mutation-detection.cwl", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] } } ] @@ -1964,7 +1757,7 @@ "prefix": "BCO", "owner": "tester", "state": "DRAFT", - "last_update": "2024-04-03T10:35:25Z", + "last_update": "2024-04-04T04:34:54.867Z", "access_count": 0, "authorized_users": [] } @@ -1974,124 +1767,1558 @@ "pk": "http://127.0.0.1:8000/BCO_000002/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "Healthy human fecal metagenomic diversity", - "version": "1.0", - "created": "2018-11-29T11:29:08-0500", - "modified": "2022-06-28T23:19:38.283Z", - "review": [ - { - "status": "approved", - "reviewer_comment": "Approved by GW staff.", - "reviewer": { - "name": "Charles Hadley King", - "affiliation": "George Washington University", - "email": "hadley_king@gwu.edu", - "contribution": [ - "curatedBy" - ], - "orcid": "https://orcid.org/0000-0003-1409-4549" - } - } - ], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], "name": "Charles Hadley King", "affiliation": "George Washington University", "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { "contribution": [ - "createdBy", "curatedBy", - "authoredBy" + "contributedBy" ], - "orcid": "https://orcid.org/0000-0003-1409-4549" + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" }, { - "name": "Raja Mazumder", - "affiliation": "George Washington University", - "email": "mazumder@gwu.edu", "contribution": [ "createdBy", - "curatedBy", - "authoredBy" + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" ], - "orcid": "https://orcid.org/0000-0001-88238-9945" + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "https://spdx.org/licenses/CC-BY-4.0.html" + "review": [] }, "usability_domain": [ - "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", - "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "metagenome", - "metagenomic analysis", - "fecal" + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } ], - "platform": [ - "hive" + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "jdoe", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000004/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000004/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0d692b94bc2528660470e38c78708cc06605f941", + "provenance_domain": { + "name": "", + "version": "", + "license": "", + "created": "2024-04-04T12:53:33", + "modified": "2024-04-04T12:53:33.679Z", + "contributors": [ + { + "name": "", + "affiliation": "", + "email": "", + "contribution": [], + "orcid": "" + } + ] + }, + "usability_domain": [], + "description_domain": { + "pipeline_steps": [] + }, + "parametric_domain": [], + "io_domain": {}, + "execution_domain": { + "script": [], + "script_driver": "", + "software_prerequisites": [], + "external_data_endpoints": [], + "environment_variables": {} + }, + "extension_domain": [], + "error_domain": {} + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-04T13:00:38.650Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "PUBLISHED", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 3, + "authorized_users": [ + 5 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "bco_api_user", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 0, + "authorized_users": [ + 4 + ] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000002/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 2, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/TEST_000004/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "review": [] + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], + "platform": [], "pipeline_steps": [ { "step_number": 1, - "name": "CensuScope", - "description": "Detect taxonomic composition of a metagenomic data set.", - "version": "1.3", - "prerequisite": [ - { - "name": "Filtered_NT_feb18_2016", - "uri": { - "uri": "https://hive.biochemistry.gwu.edu/genome/513957", - "access_time": "2016-11-30T06:46-0500" - } - } - ], + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], "input_list": [ { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" }, { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", - "access_time": "2016-11-30T06:46-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } ] }, { "step_number": 2, - "name": "HIVE-hexagon", - "description": "Alignment of reads to a set of references", - "version": "1.3", + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "http://example.com/data/546223/dnaAccessionBased.csv", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", - "access_time": "2016-11-30T06:46-0500" - }, - { - "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", - "access_time": "2016-11-30T06:46-0500" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", - "access_time": "2016-11-30T06:46-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } ] } @@ -2101,192 +3328,146 @@ "script": [ { "uri": { - "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "shell", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "CensuScope", - "version": "albinoni.2", - "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "name": "HIVE-hexagon", - "version": "babajanian.1", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "HIVE", - "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" }, { - "name": "access to e-utils", - "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], - "environment_variables": { - "key": "HOSTTYPE", - "value": "x86_64-linux" - } + "environment_variables": {} }, "io_domain": { "input_subdomain": [ { "uri": { - "filename": "Hepatitis C virus genotype 1", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus type 1b complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus clone J8CF, complete genome", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "uri": { - "filename": "Hepatitis C virus S52 polyprotein gene", - "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-01", - "uri": "http://example.com/nuc-read/514682", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" } }, { "uri": { - "filename": "HCV1a_drug_resistant_sample0001-02", - "uri": "http://example.com/nuc-read/514683", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/csv", - "uri": { - "uri": "http://example.com/data/514769/dnaAccessionBased.csv", - "access_time": "2017-01-24T09:40:17-0500" - } - }, - { - "mediatype": "text/csv", + "mediatype": "text/tsv", "uri": { - "uri": "http://example.com/data/514801/SNPProfile*.csv", - "access_time": "2017-01-24T09:40:17-0500" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, - "parametric_domain": [ - { - "param": "seed", - "value": "14", - "step": "2" - }, - { - "param": "minimum_match_len", - "value": "66", - "step": "2" - }, - { - "param": "divergence_threshold_percent", - "value": "0.30", - "step": "2" - }, - { - "param": "minimum_coverage", - "value": "15", - "step": "2" - }, - { - "param": "freq_cutoff", - "value": "0.10", - "step": "2" - } - ], + "parametric_domain": [], "error_domain": { - "empirical_error": { - "false_negative_alignment_hits": "<0.0010", - "false_discovery": "<0.05" - }, - "algorithmic_error": { - "false_positive_mutation_calls_discovery": "<0.00005", - "false_discovery": "0.005" - } + "empirical_error": {}, + "algorithmic_error": {} }, "extension_domain": [ { - "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", - "scm_extension": { - "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", - "scm_type": "git", - "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", - "scm_path": "biocompute-objects/HIVE_metagenomics", - "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] } } ] }, - "prefix": "BCO", + "prefix": "TEST", "owner": "hivelab", "state": "DRAFT", - "last_update": "2024-04-03T10:35:59Z", + "last_update": "2024-04-04T04:34:54.867Z", "access_count": 0, "authorized_users": [] } }, { "model": "biocompute.bco", - "pk": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "pk": "http://127.0.0.1:8000/TEST_000005/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { - "name": "SARS-CoV-2 reference proteome sequences", - "version": "1.0", - "created": "2021-12-16T21:06:50.969977Z", - "modified": "2022-06-28T23:21:47.218Z", - "review": [], + "name": "ARGOSdb QC related annotation data property list", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ "createdBy", "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ "curatedBy", - "importedBy", "contributedBy" ], "name": "Stephanie Singleton", @@ -2295,89 +3476,83 @@ }, { "contribution": [ - "createdBy" + "createdBy", + "curatedBy" ], "name": "Jonathon Keeney", "affiliation": "The George Washington University ", "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "MIT" + "review": [] }, "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." ], "description_domain": { "keywords": [ - "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + "curation", + "definitions", + "ontology", + "controlled vocabulary" ], "platform": [], "pipeline_steps": [ { "step_number": 1, - "name": "Download all available files from UniProt", - "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", - "prerequisite": [ - { - "name": "UniProt reference page ", - "uri": { - "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", - "access_time": "2021-12-16T21:06:50.969977Z" - } - } - ], + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", - "filename": "UP000464024_2697049.fasta.gz", - "access_time": "2021-12-16T21:06:50.969977Z" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" } - ], - "version": "1.0" + ] }, { "step_number": 2, - "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", - "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", - "prerequisite": [ - { - "name": "Dataset-maker python script", - "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" - } - }, - { - "name": "SARS-CoV-2 genome FASTA recipe", - "uri": { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", - "filename": "sars-cov-2_UP000464024_proteome_sequences.json" - } - } - ], + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], "input_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" } ], "output_list": [ { - "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", - "filename": "sars-cov-2_UP000464024_2697049.fasta", - "access_time": "2021-12-16T21:06:50.969977Z" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } - ], - "version": "1.0" + ] } ] }, @@ -2385,26 +3560,29 @@ "script": [ { "uri": { - "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", - "filename": "make-dataset.py" + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" } } ], - "script_driver": "python3", + "script_driver": "Google Drive/Sheets", "software_prerequisites": [ { - "name": "Python", - "version": "3.10.0", + "name": "Microsof Excel", + "version": "16.57", "uri": { - "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", - "filename": "" + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" } } ], "external_data_endpoints": [ { - "name": "python-3.10.0", - "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" } ], "environment_variables": {} @@ -2413,22 +3591,39 @@ "input_subdomain": [ { "uri": { - "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta.gz" + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" } } ], "output_subdomain": [ { - "mediatype": "text/plain", + "mediatype": "text/tsv", "uri": { - "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", - "filename": "UP000464024_2697049.fasta" + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" } } ] }, "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, "extension_domain": [ { "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", @@ -2439,59 +3634,57 @@ }, "dataset_categories": [ { - "category_value": "SARS-CoV-2", + "category_value": "Other", "category_name": "species" }, { - "category_value": "protein", + "category_value": "Other", "category_name": "molecule" }, { - "category_value": "SARS-CoV-2", - "category_name": "tag" + "category_value": "non-core", + "category_name": "priority" }, { - "category_value": "fasta", - "category_name": "file_type" + "category_value": "Dictionary", + "category_name": "species" }, { - "category_value": "non-core", - "category_name": "priority" + "category_value": "tsv", + "category_name": "file_type" }, { "category_value": "reviewed", "category_name": "status" - }, - { - "category_value": "internal", - "category_name": "scope" } ] } } ] }, - "prefix": "BCO", - "owner": "jdoe", + "prefix": "TEST", + "owner": "hivelab", "state": "DRAFT", - "last_update": "2024-04-03T10:44:53Z", + "last_update": "2024-04-04T04:34:54.867Z", "access_count": 0, "authorized_users": [] } }, { "model": "biocompute.bco", - "pk": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "pk": "http://127.0.0.1:8000/TEST_000006/DRAFT", "fields": { "contents": { - "object_id": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { "name": "ARGOSdb QC related annotation data property list", - "version": "1.0", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", "created": "2022-02-07T17:36:05.872Z", - "modified": "2022-02-15T14:35:54.116922", + "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ @@ -2532,7 +3725,7 @@ "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + "review": [] }, "usability_domain": [ "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", @@ -2547,6 +3740,7 @@ "ontology", "controlled vocabulary" ], + "platform": [], "pipeline_steps": [ { "step_number": 1, @@ -2700,17 +3894,17 @@ } ] }, - "prefix": "NOPUB", - "owner": "tester", - "state": "PUBLISHED", - "last_update": "2024-04-03T10:49:17Z", - "access_count": 0, + "prefix": "TEST", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, "authorized_users": [] } }, { "model": "biocompute.bco", - "pk": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "pk": "http://127.0.0.1:8000/TEST_000007/DRAFT", "fields": { "contents": { "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", @@ -2718,9 +3912,11 @@ "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", "provenance_domain": { "name": "ARGOSdb QC related annotation data property list", - "version": "1.2", + "version": "1.21", + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE", "created": "2022-02-07T17:36:05.872Z", "modified": "2022-06-28T23:44:49.394Z", + "derived_from": [], "contributors": [ { "contribution": [ @@ -2761,7 +3957,7 @@ "orcid": "https://orcid.org/0000-0001-8823-9945" } ], - "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + "review": [] }, "usability_domain": [ "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", @@ -2930,11 +4126,11 @@ } ] }, - "prefix": "NOPUB", - "owner": "tester", + "prefix": "TEST", + "owner": "hivelab", "state": "DRAFT", - "last_update": "2024-04-03T10:46:35Z", - "access_count": 0, + "last_update": "2024-04-04T04:34:54.867Z", + "access_count": 1, "authorized_users": [] } }, @@ -2946,7 +4142,7 @@ "created": "2024-03-14T13:53:59Z", "description": "Default prefix for all BioCompute Objects", "owner": "AnonymousUser", - "counter": 0, + "counter": 4, "public": true } }, @@ -2970,7 +4166,7 @@ "created": "2024-03-14T13:53:59Z", "description": "Test prefix", "owner": "tester", - "counter": 0, + "counter": 7, "public": true } } diff --git a/tests/test_apis/test_api_objects_drafts_create.py b/tests/test_apis/test_api_objects_drafts_create.py deleted file mode 100644 index b294e33..0000000 --- a/tests/test_apis/test_api_objects_drafts_create.py +++ /dev/null @@ -1,135 +0,0 @@ - -#!/usr/bin/env python3 - -"""Objects/Drafts_create -Tests for 'Creation of BCO draft is successful.' (200), -returns 207, 403 (needs to be reviewed) -""" - - -import json -from django.test import TestCase -from django.contrib.auth.models import User -from rest_framework.authtoken.models import Token -from rest_framework.test import APIClient - -class BcoDraftCreateTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - def setUp(self): - self.client = APIClient() - - self.token = Token.objects.get(user=User.objects.get(username="tester")) - - self.legacy_data = { - "POST_api_objects_draft_create": [ - { - "prefix": "BCO", - "owner_group": "tester", - "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", - "schema": "IEEE", - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" - } - } - ] - } - - self.data = [ - { - "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", - "prefix": "BCO", - "authorized_users": ["hivelab"], - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" - } - }, - { - "object_id": "http://127.0.0.1:8000/TEST_000001", - "prefix": "TEST", - "contents": { - "object_id": "https://biocomputeobject.org/TEST_000001", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" - } - } - ] - - def test_legacy_successful_creation(self): - """200: Creation of BCO drafts is successful. - """ - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', self.legacy_data, format='json') - self.assertEqual(response.status_code, 200) - - def test_successful_creation(self): - """200: Creation of BCO drafts is successful. - """ - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', self.data, format='json') - self.assertEqual(response.status_code, 200) - - def test_partial_failure(self): - # Test case for partial failure (response code 300) - ##Returns 207(Multi status) instead of 300(Partial faliure) - data = { - 'POST_api_objects_draft_create': [ - { - 'prefix': 'BCO', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - }, - { - 'prefix': 'Reeyaa', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - } - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', data=data, format='json') - self.assertEqual(response.status_code, 207) - - def test_bad_request(self): - # Test case for bad request (response code 400) - #Gives 403 forbidden request instead of 400 - data = [ - { - "object_id": "http://127.0.0.1:8000/TEST_000001", - "prefix": "TEST", - "contents": { - "object_id": "https://biocomputeobject.org/TEST_000001", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" - } - } - ] - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', data=data, format='json') - self.assertEqual(response.status_code, 400) - - def test_invalid_token(self): - # Test case for invalid token (response code 403) - # Setting authentication token to an invalid value - - data = { - 'POST_api_objects_draft_create': [ - { - 'prefix': 'BCO', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - }, - - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token InvalidToken') - response = self.client.post('/api/objects/drafts/create/', data=data, format='json') - self.assertEqual(response.status_code, 403) diff --git a/tests/test_apis/test_auth_add.py b/tests/test_apis/test_auth_add.py deleted file mode 100644 index a7843b3..0000000 --- a/tests/test_apis/test_auth_add.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python3 - -"""Add Authentication -Tests for 'New authentication credentials added to existing object' (200), -'Authentication credentials were created and added' (201), 'Bad request' (400), -'That object already exists for this account' (409) -""" - -from django.test import TestCase, Client -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from authentication.models import Authentication - -class AuthenticationTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_credentials_created_response(self): - """Add authentication is successful (200) - """ - - token = Token.objects.get(user=User.objects.get(username='tester')).key - data = {"iss": "Reeya1","sub": "ReeyaGupta1"} - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/add/', data=data) - self.assertEqual(response.status_code, 201) - - def test_credentials_added(self): - """New authentication credentials added to existing object (200) - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = {"iss": "new","sub": "new One"} - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/add/', data=data, format='json') - self.assertEqual(response.status_code, 200) - - def test_bad_request_response(self): - """Bad request (400) - """ - - token = Token.objects.get(user=User.objects.get(username='tester')).key - data = {"Missing required fields"} - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/add/', data=data, format='json') - self.assertEqual(response.status_code, 400) - - def test_object_already_exists_response(self): - """That object already exists for this account (409) - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = {"iss": "Reeya1","sub": "ReeyaGupta1"} - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/add/', data=data, format='json') - self.assertEqual(response.status_code, 409) diff --git a/tests/test_apis/test_auth_remove.py b/tests/test_apis/test_auth_remove.py deleted file mode 100644 index 150f13e..0000000 --- a/tests/test_apis/test_auth_remove.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python3 - -"""Remove Authentication -Tests for 'Remove authentication is successful.` (200), 'Authentication -failed.' (403), and 'That object does not exist for this account.' (404) -""" - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase - -class AuthenticationRemovetestcase(APITestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_success_response(self): - """Remove authentication is successful. (200) - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - - data = {"iss": "Reeya1","sub": "ReeyaGupta1"} - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/remove/', data=data, format='json') - self.assertEqual(response.status_code, 200) - - def test_bad_authentication(self): - """Authentication failed. 403 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = {} - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/remove/', data=data) - self.assertEqual(response.status_code, 403) - - def test_object_already_exists_response(self): - """That object does not exist for this account. 404 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = {"iss": "Reeya2","sub": "ReeyaGupta2"} - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/remove/', data=data) - self.assertEqual(response.status_code, 404) diff --git a/tests/test_apis/test_auth_reset_token.py b/tests/test_apis/test_auth_reset_token.py deleted file mode 100644 index 8ff77b2..0000000 --- a/tests/test_apis/test_auth_reset_token.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 - -"""Reset Token -Tests for 'Token reset is successful.' 200, and 'Bad request.', 400. -""" - -from django.test import TestCase, Client -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User - -class ResetTokenTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self) -> None: - self.client = APIClient() - - def test_reset_successful(self): - """Token reset is successful. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='tester')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/reset_token/') - self.assertEqual(response.status_code, 200) - - def test_invalid_token(self): - """Invalid token. 403 - """ - - token = 'this-is-an-invalid-token' - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/reset_token/') - self.assertEqual(response.status_code, 403) \ No newline at end of file diff --git a/tests/test_apis/test_objects_drafts_create.py b/tests/test_apis/test_biocompute/objects_drafts_create.py similarity index 100% rename from tests/test_apis/test_objects_drafts_create.py rename to tests/test_apis/test_biocompute/objects_drafts_create.py diff --git a/tests/test_apis/test_prefixes_create.py b/tests/test_apis/test_prefixes_create.py deleted file mode 100644 index 78ee601..0000000 --- a/tests/test_apis/test_prefixes_create.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python3 - -"""Bulk Create Prefixes -Tests for 'All prefixes were successfully created. 200', 'Some or all prefix -creations failed. 207', and 'Unauthorized. Authentication credentials were -not provided. 401' - -For the 207 response Each object submitted will have it's own response object -with it's own status code and message. These are as follows: - 201: The prefix * was successfully created. - 400: Bad Request. The expiration date * is not valid. - 400: Bad Request. The prefix * does not follow the naming rules for a prefix. - 403: Forbidden. User does not have permission to perform this action. - 404: Not Found. The user * was not found on the server. - 409: Conflict. The prefix the requestor is attempting to create already exists. - """ - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase -from django.contrib.auth.models import Group - -class CreatePrefixeTestCase(APITestCase): - fixtures=['tests/fixtures/test_data'] - - def setUp(self): - - self.client= APIClient() - self.data = [{ - "prefix": "test1", - "description": "Test prefix description.", - "public": "true" - }, - { - "prefix": "test2", - "description": "Test prefix description.", - "public": "true" - }] - - self.legacy_data = { - "POST_api_prefixes_create": [ - { - "owner_group": "bco_publisher", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Just a test prefix.", - "prefix": "testR" - } - ] - } - ] - } - - def test_create_prefix_success(self): - """The prefix was successfully created. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - legacy_response = self.client.post('/api/prefixes/create/', data=self.legacy_data, format='json') - response = self.client.post('/api/prefixes/create/', data=self.data, format='json') - self.assertEqual(legacy_response.status_code, 200) - self.assertEqual(response.status_code, 200) - - def test_create_multi_status(self): - """Tests for 'Some prefix creations failed. 207.' - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = { - "POST_api_prefixes_create": [ - { - "owner_group": "test_drafter", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Invalid prefix naming.", - "expiration_date": "null", - "prefix": "invalid-prefix" - } - ] - }, - { - "owner_group": "does_not_exist", - "owner_user": "does_not_exist", - "prefixes": [ - { - "description": "Invalid owner.", - "prefix": "testR" - } - ] - }, - { - "owner_group": "test_drafter", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Just a test prefix.", - "prefix": "testR" - }, - - ] - }, - { - "owner_group": "test_drafter", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Just a test prefix.", - "prefix": "test" - } - ] - } - ] - } - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/prefixes/create/', data=data, format='json') - # 201: The prefix * was successfully created. - self.assertEqual(response.data[2]['TESTR']['status_code'], 201) - - # 400: Bad Request. The prefix * does not follow the naming rules for a prefix. - self.assertIn('prefix', response.data[0]['INVALID-PREFIX']['data']) - - # 409: Conflict. The prefix the requestor is attempting to create already exists. - self.assertIn('prefix_name', response.data[3]['TEST']['data']) - - self.assertEqual(response.status_code, 207) - - def test_create_prefix_unauthorized(self): - """Unauthorized. Authentication credentials were not provided. 401 - """ - - data = { - "POST_api_prefixes_create": [ - { - "owner_group": "test_drafter", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Just a test prefix.", - "prefix": "testR" - } - ] - } - ] - } - - response = self.client.post('/api/prefixes/create/', data=data, format='json') - self.assertEqual(response.status_code, 403) diff --git a/tests/test_apis/test_prefixes_modify.py b/tests/test_apis/test_prefixes_modify.py deleted file mode 100644 index 26c014b..0000000 --- a/tests/test_apis/test_prefixes_modify.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python3 - -"""Bulk Create Prefixes -Tests for 'All prefixes were successfully created. 200', 'Some prefix -modifications failed. 207', '400: All modifications failed', and 'Unauthorized. Authentication credentials were -not provided. 401' - -For the 207 response Each object submitted will have it's own response object -with it's own status code and message. These are as follows: - 201: The prefix * was successfully created. - 400: Bad Request. The expiration date * is not valid. - 400: Bad Request. The prefix * does not follow the naming rules for a prefix. - 403: Forbidden. User does not have permission to perform this action. - 404: Not Found. The user * was not found on the server. - 409: Conflict. The prefix the requestor is attempting to create already exists. - """ - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase -from django.contrib.auth.models import Group - -class CreatePrefixeTestCase(APITestCase): - fixtures=['tests/fixtures/test_data'] - - def setUp(self): - - self.client= APIClient() - self.data = [{ - "prefix": "test", - "description": "Test prefix description." - }] - - self.legacy_data = { - "POST_api_prefixes_modify": [ - { - "owner_group": "bco_publisher", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Just a test modification for prefix.", - "prefix": "Test" - } - ] - } - ] - } - - # def test_modify_prefix_success(self): - # """The prefix was successfully modified. 200 - # """ - - # token = Token.objects.get(user=User.objects.get(username='tester')).key - - # self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - # legacy_response = self.client.post('/api/prefixes/modify/', data=self.legacy_data, format='json') - # response = self.client.post('/api/prefixes/modify/', data=self.data, format='json') - # self.assertEqual(legacy_response.status_code, 200) - # self.assertEqual(response.status_code, 200) - - # def test_modify_multi_status(self): - # """Tests for 'Some prefix modifications failed. 207.' - # """ - - # token = Token.objects.get(user=User.objects.get(username='tester')).key - # data = { - # "POST_api_prefixes_modify": [ - # { - # "owner_group": "test_drafter", - # "owner_user": "bco_api_user", - # "prefixes": [ - # { - # "description": "Invalid prefix naming.", - # "expiration_date": "null", - # "prefix": "invalid-prefix" - # } - # ] - # }, - # { - # "owner_group": "does_not_exist", - # "owner_user": "does_not_exist", - # "prefixes": [ - # { - # "description": "Invalid owner.", - # "prefix": "testR" - # } - # ] - # }, - # { - # "owner_group": "test_drafter", - # "owner_user": "bco_api_user", - # "prefixes": [ - # { - # "description": "Just a test prefix update.", - # "prefix": "test" - # }, - - # ] - # }, - # { - # "owner_group": "test_drafter", - # "owner_user": "bco_api_user", - # "prefixes": [ - # { - # "description": "Just a test prefix.", - # "prefix": "BCO" - # } - # ] - # } - # ] - # } - - # self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - # response = self.client.post('/api/prefixes/modify/', data=data, format='json') - # # 201: The prefix * was successfully created. - # self.assertEqual(response.data[2]['TEST']['status_code'], 200) - - # # 400: Bad Request. The prefix * does not exist. - # self.assertIn('prefix', response.data[0]['INVALID-PREFIX']['data']) - # # 404: Not Found. The user * was not found on the server. - - # # 409: Conflict. The prefix the requestor is attempting to create already exists. - # self.assertIn('permissions', response.data[3]['BCO']['message']) - - # self.assertEqual(response.status_code, 207) - - # def test_create_prefix_unauthorized(self): - # """Unauthorized. Authentication credentials were not provided. 401 - # """ - - # data = { - # "POST_api_prefixes_create": [ - # { - # "owner_group": "test_drafter", - # "owner_user": "bco_api_user", - # "prefixes": [ - # { - # "description": "Just a test prefix.", - # "prefix": "testR" - # } - # ] - # } - # ] - # } - - # response = self.client.post('/api/prefixes/create/', data=data, format='json') - # self.assertEqual(response.status_code, 403)