diff --git a/biocompute/migrations/0001_initial.py b/biocompute/migrations/0001_initial.py index 68fdd1e2..47ed11b1 100644 --- a/biocompute/migrations/0001_initial.py +++ b/biocompute/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.13 on 2024-03-20 18:48 +# Generated by Django 3.2.13 on 2024-04-02 20:08 from django.conf import settings from django.db import migrations, models @@ -10,9 +10,8 @@ class Migration(migrations.Migration): initial = True dependencies = [ - ('prefix', '0001_initial'), - ('auth', '0012_alter_user_first_name_max_length'), migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('prefix', '0001_initial'), ] operations = [ @@ -24,9 +23,8 @@ class Migration(migrations.Migration): ('state', models.CharField(choices=[('REFERENCED', 'referenced'), ('PUBLISHED', 'published'), ('DRAFT', 'draft'), ('DELETE', 'delete')], default='DRAFT', max_length=20)), ('last_update', models.DateTimeField()), ('access_count', models.IntegerField(default=0)), - ('authorized_groups', models.ManyToManyField(blank=True, to='auth.Group')), ('authorized_users', models.ManyToManyField(blank=True, related_name='authorized_bcos', to=settings.AUTH_USER_MODEL)), - ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='owned_bcos', to=settings.AUTH_USER_MODEL)), + ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='owned_bcos', to=settings.AUTH_USER_MODEL, to_field='username')), ('prefix', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='prefix.prefix')), ], ), diff --git a/docs/refactor.md b/docs/refactor.md index 9c201091..e5454202 100644 --- a/docs/refactor.md +++ b/docs/refactor.md @@ -52,4 +52,13 @@ Prefix Perms: delete -> Delete Draft publish -> Publish Draft view -> View/download - ONLY if private \ No newline at end of file + ONLY if private + + If prefix is public anyone can view, but only auth users can modify. + + Things to look for when reviewing code: + - variable names are consistant and make sense + - all functions have documentation. This shoudl include: + - descriptions + - explicit parameters/inputs and outputs/returns + - hoverover should display function documentation diff --git a/prefix/apis.py b/prefix/apis.py index ac27f6fb..406d0bb3 100644 --- a/prefix/apis.py +++ b/prefix/apis.py @@ -124,6 +124,8 @@ def post(self, request) -> Response: data = request.data rejected_requests = False accepted_requests = False + if 'POST_api_prefixes_create' in request.data: + data = legacy_api_converter(request.data) if data[0]['prefix']=='test' and data[0]['public'] is True: return Response( @@ -132,9 +134,6 @@ def post(self, request) -> Response: 'TEST',"SUCCESS",201,"Prefix TEST created" ) ) - - if 'POST_api_prefixes_create' in request.data: - data = legacy_api_converter(request.data) for index, object in enumerate(data): response_id = object.get("prefix", index).upper() diff --git a/prefix/migrations/0001_initial.py b/prefix/migrations/0001_initial.py index bb609b85..bda7b305 100644 --- a/prefix/migrations/0001_initial.py +++ b/prefix/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.13 on 2024-03-20 18:48 +# Generated by Django 3.2.13 on 2024-04-02 20:08 from django.conf import settings from django.db import migrations, models @@ -12,7 +12,6 @@ class Migration(migrations.Migration): dependencies = [ migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ('auth', '0012_alter_user_first_name_max_length'), ] operations = [ @@ -24,7 +23,7 @@ class Migration(migrations.Migration): ('created', models.DateTimeField(blank=True, default=django.utils.timezone.now, null=True)), ('description', models.TextField(blank=True, null=True)), ('counter', models.IntegerField(default=0, help_text='Counter for object_id asignment')), - ('authorized_groups', models.ManyToManyField(blank=True, related_name='authorized_prefix', to='auth.Group')), + ('public', models.BooleanField(default=True, help_text='Boolean field to indicate if there are restrictions on the use of this prefix')), ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL, to_field='username')), ], ), diff --git a/prefix/services.py b/prefix/services.py index f5a806fc..91682c55 100644 --- a/prefix/services.py +++ b/prefix/services.py @@ -105,7 +105,12 @@ def create(self, validated_data): """ validated_data.pop('user_permissions') - public = validated_data['public'] + + try: + public = validated_data['public'] + except KeyError: + public, validated_data['public'] = True, True + prefix_instance = Prefix.objects.create(**validated_data, created=timezone.now()) if public is False: diff --git a/tests/fixtures/bco_dump.json b/tests/fixtures/bco_dump.json new file mode 100644 index 00000000..16722d66 --- /dev/null +++ b/tests/fixtures/bco_dump.json @@ -0,0 +1,5216 @@ +[ + { + "model": "api.bco", + "pk": 1, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.1", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:10:12.804Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:10:17.996Z" + } + }, + { + "model": "api.bco", + "pk": 2, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.1", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:13:13.841Z" + } + }, + { + "model": "api.bco", + "pk": 3, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0", + "created": "2018-11-29T11:29:08-0500", + "modified": "2022-06-28T23:19:38.283Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:19:53.937Z" + } + }, + { + "model": "api.bco", + "pk": 4, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:47.218Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:21:56.878Z" + } + }, + { + "model": "api.bco", + "pk": 5, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "owner_group": "test_drafter", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:44:58.149Z" + } + }, + { + "model": "api.bco", + "pk": 6, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "owner_group": "other_drafter", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:41:49.698Z" + } + }, + { + "model": "api.bco", + "pk": 7, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.3", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:06:43.263Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:10:18.007Z" + } + }, + { + "model": "api.bco", + "pk": 8, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "owner_group": "test50", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:13:13.859Z" + } + }, + { + "model": "api.bco", + "pk": 9, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0.0", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "obsolete_after": "2118-09-26T14:43:43-0400", + "embargo": { + "start_time": "2000-09-26T14:43:43-0400", + "end_time": "2000-09-26T14:43:45-0400" + }, + "created": "2018-11-29T11:29:08-0500", + "modified": "2018-11-30T11:29:08-0500", + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "xref": [ + { + "namespace": "uberon", + "name": "Uber Anatomy Ontology", + "ids": [ + "0001988" + ], + "access_time": "2016-11-30T06:46-0500" + }, + { + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": [ + "9606" + ], + "access_time": "2016-11-30T06:46-0500" + } + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + } + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", + "owner_group": "hivelab37", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:19:53.938Z" + } + }, + { + "model": "api.bco", + "pk": 10, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:13.091Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", + "owner_group": "jdoe58", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:21:56.879Z" + } + }, + { + "model": "api.bco", + "pk": 11, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:41:49.719Z" + } + }, + { + "model": "api.bco", + "pk": 12, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.0", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.bco", + "pk": 13, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.bco", + "pk": 14, + "fields": { + "contents": { + "object_id": "", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "da75a2c36dd6bf449d1f7b150197096e11c51812", + "provenance_domain": { + "name": "", + "version": "", + "license": "", + "created": "2023-09-05T18:10:23", + "modified": "2023-09-05T18:10:23.167Z", + "contributors": [ + { + "name": "", + "affiliation": "", + "email": "", + "contribution": [], + "orcid": "" + } + ] + }, + "usability_domain": [], + "description_domain": { + "pipeline_steps": [] + }, + "parametric_domain": [], + "io_domain": {}, + "execution_domain": { + "script": [], + "script_driver": "", + "software_prerequisites": [], + "external_data_endpoints": [], + "environment_variables": {} + }, + "extension_domain": [] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000002/DRAFT", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "DRAFT", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2023-09-24T09:16:04.123Z" + } + } +] \ No newline at end of file diff --git a/tests/fixtures/test_data.json b/tests/fixtures/test_data.json index 2bc23342..c3ec2556 100644 --- a/tests/fixtures/test_data.json +++ b/tests/fixtures/test_data.json @@ -478,7 +478,7 @@ }, { "model": "auth.permission", - "pk": 54, + "pk": 67, "fields": { "name": "Can add BCOs with prefix NOPUB", "content_type": 13, @@ -487,7 +487,7 @@ }, { "model": "auth.permission", - "pk": 55, + "pk": 68, "fields": { "name": "Can change BCOs with prefix NOPUB", "content_type": 13, @@ -496,7 +496,7 @@ }, { "model": "auth.permission", - "pk": 56, + "pk": 69, "fields": { "name": "Can delete BCOs with prefix NOPUB", "content_type": 13, @@ -505,7 +505,7 @@ }, { "model": "auth.permission", - "pk": 57, + "pk": 70, "fields": { "name": "Can publish BCOs with prefix NOPUB", "content_type": 13, @@ -514,7 +514,7 @@ }, { "model": "auth.permission", - "pk": 58, + "pk": 71, "fields": { "name": "Can add new user", "content_type": 11, @@ -523,7 +523,7 @@ }, { "model": "auth.permission", - "pk": 59, + "pk": 72, "fields": { "name": "Can change new user", "content_type": 11, @@ -532,7 +532,7 @@ }, { "model": "auth.permission", - "pk": 60, + "pk": 73, "fields": { "name": "Can delete new user", "content_type": 11, @@ -757,110 +757,6 @@ ] } }, - { - "model": "contenttypes.contenttype", - "pk": 1, - "fields": { - "app_label": "admin", - "model": "logentry" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 2, - "fields": { - "app_label": "auth", - "model": "permission" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 3, - "fields": { - "app_label": "auth", - "model": "group" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 4, - "fields": { - "app_label": "auth", - "model": "user" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 5, - "fields": { - "app_label": "contenttypes", - "model": "contenttype" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 6, - "fields": { - "app_label": "sessions", - "model": "session" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 7, - "fields": { - "app_label": "authtoken", - "model": "token" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 8, - "fields": { - "app_label": "authtoken", - "model": "tokenproxy" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 9, - "fields": { - "app_label": "blacklist", - "model": "blacklistedtoken" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 10, - "fields": { - "app_label": "authentication", - "model": "authentication" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 11, - "fields": { - "app_label": "authentication", - "model": "newuser" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 12, - "fields": { - "app_label": "biocompute", - "model": "bco" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 13, - "fields": { - "app_label": "prefix", - "model": "prefix" - } - }, { "model": "authtoken.token", "pk": "1ef53d4042d14299918a4e1f21d2be128a2a7427", diff --git a/tests/test_apis/test_api_authentication/test_auth_add.py b/tests/test_apis/test_api_authentication/test_auth_add.py new file mode 100644 index 00000000..a7843b3b --- /dev/null +++ b/tests/test_apis/test_api_authentication/test_auth_add.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 + +"""Add Authentication +Tests for 'New authentication credentials added to existing object' (200), +'Authentication credentials were created and added' (201), 'Bad request' (400), +'That object already exists for this account' (409) +""" + +from django.test import TestCase, Client +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User +from authentication.models import Authentication + +class AuthenticationTestCase(TestCase): + fixtures = ['tests/fixtures/test_data'] + + def setUp(self): + self.client = APIClient() + + def test_credentials_created_response(self): + """Add authentication is successful (200) + """ + + token = Token.objects.get(user=User.objects.get(username='tester')).key + data = {"iss": "Reeya1","sub": "ReeyaGupta1"} + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/add/', data=data) + self.assertEqual(response.status_code, 201) + + def test_credentials_added(self): + """New authentication credentials added to existing object (200) + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + data = {"iss": "new","sub": "new One"} + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/add/', data=data, format='json') + self.assertEqual(response.status_code, 200) + + def test_bad_request_response(self): + """Bad request (400) + """ + + token = Token.objects.get(user=User.objects.get(username='tester')).key + data = {"Missing required fields"} + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/add/', data=data, format='json') + self.assertEqual(response.status_code, 400) + + def test_object_already_exists_response(self): + """That object already exists for this account (409) + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + data = {"iss": "Reeya1","sub": "ReeyaGupta1"} + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/add/', data=data, format='json') + self.assertEqual(response.status_code, 409) diff --git a/tests/test_apis/test_api_authentication/test_auth_reset_token.py b/tests/test_apis/test_api_authentication/test_auth_reset_token.py new file mode 100644 index 00000000..8ff77b20 --- /dev/null +++ b/tests/test_apis/test_api_authentication/test_auth_reset_token.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 + +"""Reset Token +Tests for 'Token reset is successful.' 200, and 'Bad request.', 400. +""" + +from django.test import TestCase, Client +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User + +class ResetTokenTestCase(TestCase): + fixtures = ['tests/fixtures/test_data'] + + def setUp(self) -> None: + self.client = APIClient() + + def test_reset_successful(self): + """Token reset is successful. 200 + """ + + token = Token.objects.get(user=User.objects.get(username='tester')).key + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/reset_token/') + self.assertEqual(response.status_code, 200) + + def test_invalid_token(self): + """Invalid token. 403 + """ + + token = 'this-is-an-invalid-token' + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/reset_token/') + self.assertEqual(response.status_code, 403) \ No newline at end of file diff --git a/tests/test_apis/test_api_authentication/testi_auth_remove.py b/tests/test_apis/test_api_authentication/testi_auth_remove.py new file mode 100644 index 00000000..150f13e5 --- /dev/null +++ b/tests/test_apis/test_api_authentication/testi_auth_remove.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +"""Remove Authentication +Tests for 'Remove authentication is successful.` (200), 'Authentication +failed.' (403), and 'That object does not exist for this account.' (404) +""" + +from django.test import TestCase +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User +from rest_framework.test import APITestCase + +class AuthenticationRemovetestcase(APITestCase): + fixtures = ['tests/fixtures/test_data'] + + def setUp(self): + self.client = APIClient() + + def test_success_response(self): + """Remove authentication is successful. (200) + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + + data = {"iss": "Reeya1","sub": "ReeyaGupta1"} + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/remove/', data=data, format='json') + self.assertEqual(response.status_code, 200) + + def test_bad_authentication(self): + """Authentication failed. 403 + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + data = {} + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/remove/', data=data) + self.assertEqual(response.status_code, 403) + + def test_object_already_exists_response(self): + """That object does not exist for this account. 404 + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + data = {"iss": "Reeya2","sub": "ReeyaGupta2"} + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/auth/remove/', data=data) + self.assertEqual(response.status_code, 404) diff --git a/tests/test_apis/test_api_prefix/__init__.py b/tests/test_apis/test_api_prefix/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_apis/test_api_prefix/test_prefixes_create.py b/tests/test_apis/test_api_prefix/test_prefixes_create.py new file mode 100644 index 00000000..1c60b030 --- /dev/null +++ b/tests/test_apis/test_api_prefix/test_prefixes_create.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 + +"""Bulk Create Prefixes +Tests for 'All prefixes were successfully created. 200', 'Some or all prefix +creations failed. 207', and 'Unauthorized. Authentication credentials were +not provided. 401' + +For the 207 response Each object submitted will have it's own response object +with it's own status code and message. These are as follows: + 201: The prefix * was successfully created. + 400: Bad Request. The expiration date * is not valid. + 400: Bad Request. The prefix * does not follow the naming rules for a prefix. + 403: Forbidden. User does not have permission to perform this action. + 404: Not Found. The user * was not found on the server. + 409: Conflict. The prefix the requestor is attempting to create already exists. + """ + +from django.test import TestCase +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User +from rest_framework.test import APITestCase +from django.contrib.auth.models import Group + +class CreatePrefixeTestCase(APITestCase): + fixtures=['tests/fixtures/test_data'] + + def setUp(self): + + self.client= APIClient() + self.data = [{ + "prefix": "test1", + "description": "Test prefix description.", + "public": "true" + }, + { + "prefix": "test2", + "description": "Test prefix description.", + "public": "true" + }] + + self.legacy_data = { + "POST_api_prefixes_create": [ + { + "owner_group": "bco_publisher", + "owner_user": "bco_api_user", + "prefixes": [ + { + "description": "Just a test prefix.", + "prefix": "testR" + } + ] + } + ] + } + + def test_create_prefix_success(self): + """The prefix was successfully created. 200 + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + legacy_response = self.client.post('/api/prefixes/create/', data=self.legacy_data, format='json') + response = self.client.post('/api/prefixes/create/', data=self.data, format='json') + self.assertEqual(legacy_response.status_code, 201) + self.assertEqual(response.status_code, 201) + + def test_create_multi_status(self): + """Tests for 'Some prefix creations failed. 207.' + """ + + token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key + data = { + "POST_api_prefixes_create": [ + { + "owner_group": "test_drafter", + "owner_user": "bco_api_user", + "prefixes": [ + { + "description": "Invalid prefix naming.", + "expiration_date": "null", + "prefix": "invalid-prefix" + } + ] + }, + { + "owner_group": "does_not_exist", + "owner_user": "does_not_exist", + "prefixes": [ + { + "description": "Invalid owner.", + "prefix": "testR" + } + ] + }, + { + "owner_group": "test_drafter", + "owner_user": "bco_api_user", + "prefixes": [ + { + "description": "Just a test prefix.", + "prefix": "test2" + }, + + ] + }, + { + "owner_group": "test_drafter", + "owner_user": "bco_api_user", + "prefixes": [ + { + "description": "Just a test prefix.", + "prefix": "test" + } + ] + } + ] + } + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + response = self.client.post('/api/prefixes/create/', data=data, format='json') + # 201: The prefix * was successfully created. + self.assertEqual(response.data[2]['TEST2']['status_code'], 201) + + # 400: Bad Request. The prefix * does not follow the naming rules for a prefix. + self.assertIn('prefix', response.data[0]['INVALID-PREFIX']['data']) + + # 409: Conflict. The prefix the requestor is attempting to create already exists. + self.assertIn('prefix_name', response.data[3]['TEST']['data']) + + self.assertEqual(response.status_code, 207) + + def test_create_prefix_unauthorized(self): + """Unauthorized. Authentication credentials were not provided. 401 + """ + + data = { + "POST_api_prefixes_create": [ + { + "owner_group": "test_drafter", + "owner_user": "bco_api_user", + "prefixes": [ + { + "description": "Just a test prefix.", + "prefix": "testR" + } + ] + } + ] + } + + response = self.client.post('/api/prefixes/create/', data=data, format='json') + self.assertEqual(response.status_code, 403) diff --git a/tests/test_apis/test_api_prefix/test_prefixes_modify.py b/tests/test_apis/test_api_prefix/test_prefixes_modify.py new file mode 100644 index 00000000..26c014b9 --- /dev/null +++ b/tests/test_apis/test_api_prefix/test_prefixes_modify.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +"""Bulk Create Prefixes +Tests for 'All prefixes were successfully created. 200', 'Some prefix +modifications failed. 207', '400: All modifications failed', and 'Unauthorized. Authentication credentials were +not provided. 401' + +For the 207 response Each object submitted will have it's own response object +with it's own status code and message. These are as follows: + 201: The prefix * was successfully created. + 400: Bad Request. The expiration date * is not valid. + 400: Bad Request. The prefix * does not follow the naming rules for a prefix. + 403: Forbidden. User does not have permission to perform this action. + 404: Not Found. The user * was not found on the server. + 409: Conflict. The prefix the requestor is attempting to create already exists. + """ + +from django.test import TestCase +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User +from rest_framework.test import APITestCase +from django.contrib.auth.models import Group + +class CreatePrefixeTestCase(APITestCase): + fixtures=['tests/fixtures/test_data'] + + def setUp(self): + + self.client= APIClient() + self.data = [{ + "prefix": "test", + "description": "Test prefix description." + }] + + self.legacy_data = { + "POST_api_prefixes_modify": [ + { + "owner_group": "bco_publisher", + "owner_user": "bco_api_user", + "prefixes": [ + { + "description": "Just a test modification for prefix.", + "prefix": "Test" + } + ] + } + ] + } + + # def test_modify_prefix_success(self): + # """The prefix was successfully modified. 200 + # """ + + # token = Token.objects.get(user=User.objects.get(username='tester')).key + + # self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + # legacy_response = self.client.post('/api/prefixes/modify/', data=self.legacy_data, format='json') + # response = self.client.post('/api/prefixes/modify/', data=self.data, format='json') + # self.assertEqual(legacy_response.status_code, 200) + # self.assertEqual(response.status_code, 200) + + # def test_modify_multi_status(self): + # """Tests for 'Some prefix modifications failed. 207.' + # """ + + # token = Token.objects.get(user=User.objects.get(username='tester')).key + # data = { + # "POST_api_prefixes_modify": [ + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Invalid prefix naming.", + # "expiration_date": "null", + # "prefix": "invalid-prefix" + # } + # ] + # }, + # { + # "owner_group": "does_not_exist", + # "owner_user": "does_not_exist", + # "prefixes": [ + # { + # "description": "Invalid owner.", + # "prefix": "testR" + # } + # ] + # }, + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Just a test prefix update.", + # "prefix": "test" + # }, + + # ] + # }, + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Just a test prefix.", + # "prefix": "BCO" + # } + # ] + # } + # ] + # } + + # self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + # response = self.client.post('/api/prefixes/modify/', data=data, format='json') + # # 201: The prefix * was successfully created. + # self.assertEqual(response.data[2]['TEST']['status_code'], 200) + + # # 400: Bad Request. The prefix * does not exist. + # self.assertIn('prefix', response.data[0]['INVALID-PREFIX']['data']) + # # 404: Not Found. The user * was not found on the server. + + # # 409: Conflict. The prefix the requestor is attempting to create already exists. + # self.assertIn('permissions', response.data[3]['BCO']['message']) + + # self.assertEqual(response.status_code, 207) + + # def test_create_prefix_unauthorized(self): + # """Unauthorized. Authentication credentials were not provided. 401 + # """ + + # data = { + # "POST_api_prefixes_create": [ + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Just a test prefix.", + # "prefix": "testR" + # } + # ] + # } + # ] + # } + + # response = self.client.post('/api/prefixes/create/', data=data, format='json') + # self.assertEqual(response.status_code, 403) diff --git a/tests/test_apis/test_biocompute/__init__.py b/tests/test_apis/test_biocompute/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_apis/test_biocompute/test_objects_drafts_create.py b/tests/test_apis/test_biocompute/test_objects_drafts_create.py new file mode 100644 index 00000000..68b15fc1 --- /dev/null +++ b/tests/test_apis/test_biocompute/test_objects_drafts_create.py @@ -0,0 +1,135 @@ + +#!/usr/bin/env python3 + +"""Objects/Drafts_create +Tests for 'Creation of BCO draft is successful.' (200), +returns 207, 403 (needs to be reviewed) +""" + + +import json +from django.test import TestCase +from django.contrib.auth.models import User +from rest_framework.authtoken.models import Token +from rest_framework.test import APIClient + +class BcoDraftCreateTestCase(TestCase): + fixtures = ['tests/fixtures/test_data'] + def setUp(self): + self.client = APIClient() + + self.token = Token.objects.get(user=User.objects.get(username="tester")) + + self.legacy_data = { + "POST_api_objects_draft_create": [ + { + "prefix": "BCO", + "owner_group": "tester", + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "schema": "IEEE", + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" + } + } + ] + } + + self.data = [ + { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "prefix": "BCO", + "authorized_users": ["hivelab"], + "contents": { + "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" + } + }, + { + "object_id": "http://127.0.0.1:8000/TEST_000001", + "prefix": "TEST", + "contents": { + "object_id": "https://biocomputeobject.org/TEST_000001", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" + } + } + ] + + def test_legacy_successful_creation(self): + """200: Creation of BCO drafts is successful. + """ + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/create/', self.legacy_data, format='json') + self.assertEqual(response.status_code, 200) + + def test_successful_creation(self): + """200: Creation of BCO drafts is successful. + """ + + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/create/', self.data, format='json') + self.assertEqual(response.status_code, 200) + + def test_partial_failure(self): + # Test case for partial failure (response code 300) + ##Returns 207(Multi status) instead of 300(Partial faliure) + data = { + 'POST_api_objects_draft_create': [ + { + 'prefix': 'BCO', + 'owner_group': 'bco_drafter', + 'schema': 'IEEE', + 'contents': {} + }, + { + 'prefix': 'Reeyaa', + 'owner_group': 'bco_drafter', + 'schema': 'IEEE', + 'contents': {} + } + ] + } + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/create/', data=data, format='json') + self.assertEqual(response.status_code, 207) + + def test_bad_request(self): + # Test case for bad request (response code 400) + #Gives 403 forbidden request instead of 400 + data = [ + { + "object_id": "http://127.0.0.1:8000/TEST_000001", + "prefix": "TEST", + "contents": { + "object_id": "https://biocomputeobject.org/TEST_000001", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" + } + } + ] + self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) + response = self.client.post('/api/objects/drafts/create/', data=data, format='json') + self.assertEqual(response.status_code, 400) + + def test_invalid_token(self): + # Test case for invalid token (response code 403) + # Setting authentication token to an invalid value + + data = { + 'POST_api_objects_draft_create': [ + { + 'prefix': 'BCO', + 'owner_group': 'bco_drafter', + 'schema': 'IEEE', + 'contents': {} + }, + + ] + } + self.client.credentials(HTTP_AUTHORIZATION='Token InvalidToken') + response = self.client.post('/api/objects/drafts/create/', data=data, format='json') + self.assertEqual(response.status_code, 403)