From 30ba550a52772f3194a53418d17ccd0f0cb48c45 Mon Sep 17 00:00:00 2001 From: hadleyking Date: Wed, 3 Apr 2024 09:25:57 -0400 Subject: [PATCH 1/3] Add `DraftRetrieveApi` To do this functions related to Prefixes and the associated permissions were created Changes to be committed: modified: biocompute/apis.py modified: biocompute/models.py modified: biocompute/selectors.py modified: biocompute/services.py modified: biocompute/urls.py modified: config/asgi.py modified: config/fixtures/local_data.json modified: config/services.py modified: config/urls.py modified: prefix/selectors.py modified: prefix/services.py deleted: test.json modified: tests/fixtures/example_bco.py modified: tests/fixtures/test_data.json deleted: token.json --- biocompute/apis.py | 102 +- biocompute/models.py | 3 - biocompute/selectors.py | 44 + biocompute/services.py | 112 +- biocompute/urls.py | 3 +- config/asgi.py | 2 +- config/fixtures/local_data.json | 2503 ++++++++++++++++++++++++++++-- config/services.py | 24 +- config/urls.py | 2 + prefix/selectors.py | 69 +- prefix/services.py | 76 +- test.json | 862 ----------- tests/fixtures/example_bco.py | 7 +- tests/fixtures/test_data.json | 2515 +++++++++++++++++++++++++++++-- token.json | 1 - 15 files changed, 5085 insertions(+), 1240 deletions(-) delete mode 100644 test.json delete mode 100644 token.json diff --git a/biocompute/apis.py b/biocompute/apis.py index 9aae450e..226c0415 100644 --- a/biocompute/apis.py +++ b/biocompute/apis.py @@ -6,6 +6,7 @@ from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema +from django.conf import settings from django.db import utils from rest_framework.views import APIView from rest_framework import status @@ -14,6 +15,11 @@ from tests.fixtures.example_bco import BCO_000001 from config.services import legacy_api_converter, response_constructor from biocompute.services import BcoDraftSerializer +from biocompute.selectors import retrieve_bco +from prefix.selectors import user_can_draft + + +hostname = settings.PUBLIC_HOSTNAME BCO_DRAFT_SCHEMA = openapi.Schema( type=openapi.TYPE_ARRAY, @@ -25,22 +31,17 @@ "object_id": openapi.Schema( type=openapi.TYPE_STRING, description="BCO Object ID.", - example="https://biocomputeobject.org/TEST_000001" + example=f"{hostname}/TEST_000001/DRAFT" ), "prefix": openapi.Schema( type=openapi.TYPE_STRING, description="BCO Prefix to use", - example="BCO" + example="TEST" ), "authorized_users": openapi.Schema( type=openapi.TYPE_ARRAY, description="Users which can access the BCO draft.", - items=openapi.Schema(type=openapi.TYPE_STRING, example="None") - ), - "authorized_groups": openapi.Schema( - type=openapi.TYPE_ARRAY, - description="Group which can access the BCO draft.", - items=openapi.Schema(type=openapi.TYPE_STRING, example="None") + items=openapi.Schema(type=openapi.TYPE_STRING, example="tester") ), "contents": openapi.Schema( type=openapi.TYPE_OBJECT, @@ -53,14 +54,19 @@ ) class DraftsCreateApi(APIView): - """ - Create BCO Draft [Bulk Enabled] + """Create BCO Draft [Bulk Enabled] - -------------------- + API endpoint for creating new BioCompute Object (BCO) drafts, with support + for bulk operations. - Creates a new BCO draft object. + This endpoint allows authenticated users to create new BCO drafts + individually or in bulk by submitting a list of BCO drafts. The operation + can be performed for one or more drafts in a single request. Each draft is + validated and processed independently, allowing for mixed response + statuses (HTTP_207_MULTI_STATUS) in the case of bulk submissions. """ - + + permission_classes = [IsAuthenticated,] request_body = BCO_DRAFT_SCHEMA @swagger_auto_schema( @@ -87,6 +93,30 @@ def post(self, request) -> Response: for index, object in enumerate(data): response_id = object.get("object_id", index) + bco_prefix = object.get("prefix", index) + prefix_permitted = user_can_draft(owner, bco_prefix) + + if prefix_permitted is None: + response_data.append(response_constructor( + identifier=response_id, + status = "NOT FOUND", + code= 404, + message= f"Invalid prefix: {bco_prefix}.", + )) + rejected_requests = True + continue + + if prefix_permitted is False: + response_data.append(response_constructor( + identifier=response_id, + status = "FORBIDDEN", + code= 400, + message= f"User, {owner}, does not have draft permissions"\ + + " for prefix {bco_prefix}.", + )) + rejected_requests = True + continue + bco = BcoDraftSerializer(data=object, context={'request': request}) if bco.is_valid(): @@ -135,3 +165,49 @@ def post(self, request) -> Response: status=status.HTTP_200_OK, data=response_data ) + +class DraftRetrieveApi(APIView): + """Get a draft object + + API View to Retrieve a Draft Object + + This view allows authenticated users to retrieve the contents of a specific draft object + identified by its BioCompute Object (BCO) accession number. The operation ensures that + only users with appropriate permissions can access the draft contents. + + Parameters: + - bco_accession (str): A string parameter passed in the URL path that uniquely identifies + the draft object to be retrieved. + """ + + @swagger_auto_schema( + manual_parameters=[ + openapi.Parameter( + "bco_accession", + openapi.IN_PATH, + description="Object ID to be viewed.", + type=openapi.TYPE_STRING, + default="BCO_000000" + ) + ], + responses={ + 200: "Success. Object contents returned", + 401: "Authentication credentials were not provided, or" + " the token was invalid.", + 403: "Forbidden. The requestor does not have appropriate permissions.", + 404: "Not found. That draft could not be found on the server." + }, + tags=["BCO Management"], + ) + + def get(self, request, bco_accession): + requester = request.user + print(requester) + bco_instance = retrieve_bco(bco_accession, requester) + if bco_instance is False: + return Response( + status=status.HTTP_403_FORBIDDEN, + data={"message": f"User, {requester}, does not have draft permissions"\ + + f" for {bco_accession}."}) + else: + return Response(status=status.HTTP_200_OK, data=bco_instance.contents) \ No newline at end of file diff --git a/biocompute/models.py b/biocompute/models.py index a74ed14a..49c6284d 100644 --- a/biocompute/models.py +++ b/biocompute/models.py @@ -31,8 +31,6 @@ class Bco(models.Model): String representing the django.contrib.auth.models.User that 'owns' the object authorized_users: ManyToManyField(User) String representing the User that has access to the object - authorized_group: ManyToManyField(Group) - String representing the Group that has access to the object prefix: str Prefix for the BCO state:str @@ -58,7 +56,6 @@ class Bco(models.Model): related_name="authorized_bcos", blank=True ) - authorized_groups = models.ManyToManyField(Group,blank=True) state = models.CharField(max_length=20, choices=STATE_CHOICES, default="DRAFT") last_update = models.DateTimeField() access_count = models.IntegerField(default=0) diff --git a/biocompute/selectors.py b/biocompute/selectors.py index e69de29b..4c319553 100644 --- a/biocompute/selectors.py +++ b/biocompute/selectors.py @@ -0,0 +1,44 @@ +# biocompute/selectors.py + +"""BioCompute Selectors + +Functions to query the database related to BioCompute Objects +""" + +from django.conf import settings +from django.contrib.auth. models import User +from biocompute.models import Bco +from prefix.selectors import user_can_view + +def retrieve_bco(bco_accession: str, user: User) -> bool: + """Retrieve BCO + + Determines if a user can view a specific BioCompute Object (BCO). + + This function checks whether a given user has the permission to view a BCO + identified by its accession number. It performs several checks: + + 1. Verifies if the BCO exists. If not, returns `None`. + 2. Checks if the user is explicitly authorized to view this specific BCO. + 3. If not directly authorized, it then checks if the user has general 'view' permissions + for the prefix associated with the BCO. + + """ + + hostname = settings.PUBLIC_HOSTNAME + object_id = f"{hostname}/{bco_accession}/DRAFT" + prefix_name = bco_accession.split("_")[0] + + try: + bco_instance = Bco.objects.get(object_id=object_id) + except Bco.DoesNotExist: + return None + + if user in bco_instance.authorized_users.all(): + return bco_instance + + view_permission = user_can_view(prefix_name, user) + if view_permission is False: + return False + + return bco_instance \ No newline at end of file diff --git a/biocompute/services.py b/biocompute/services.py index 5c4dcd83..028530db 100644 --- a/biocompute/services.py +++ b/biocompute/services.py @@ -1,15 +1,13 @@ #!/usr/bin/env python3 # biocopmute/services.py -import re -from urllib.parse import urlparse from django.conf import settings from django.db import transaction from django.utils import timezone from biocompute.models import Bco from prefix.models import Prefix from prefix.services import prefix_counter_increment -from django.contrib.auth.models import Group, User +from django.contrib.auth.models import User from rest_framework import serializers """BioCompute Services @@ -20,46 +18,71 @@ HOSTNAME = settings.PUBLIC_HOSTNAME class BcoDraftSerializer(serializers.Serializer): + """Serializer for drafting BioCompute Objects (BCO). + + This serializer is used to validate and serialize data related to the + creation or update of BCO drafts. It handles the initial data validation + including the existence of users specified as authorized users, the + validity of the prefix, and the construction or validation of the object_id + if provided. + + Attributes: + - object_id (URLField, optional): + The unique identifier of the BCO, which should be a URL. This field is + not required for creation as it can be generated. + - contents (JSONField): + The contents of the BCO in JSON format. + - prefix (CharField): + A short alphanumeric prefix related to the BCO. Defaults to 'BCO'. + - authorized_users (ListField): + A list of usernames authorized to access the BCO, besides the owner. + + Methods: + - validate: Validates the incoming data for creating or updating a BCO draft. + - create: Creates a new BCO instance based on the validated data. + """ + object_id = serializers.URLField(required=False) contents = serializers.JSONField() prefix = serializers.CharField(max_length=5, min_length=3, default="BCO") - authorized_groups = serializers.ListField(child=serializers.CharField(), required=False) authorized_users = serializers.ListField(child=serializers.CharField(), required=False) def validate(self, attrs): """BCO Draft Validator + + Validates the presence and correctness of 'authorized_users' and + 'prefix'. If 'object_id' is provided, it validates the format and + uniqueness of it. Adds the request's user as the owner of the BCO. + + Parameters: + - attrs (dict): The incoming data to be validated. + + Returns: + - dict: The validated data with additional fields such as 'owner' and + potentially modified 'prefix'. + + Raises: + - serializers.ValidationError: If any validation checks fail. """ errors = {} request = self.context.get('request') attrs["owner"] = request.user - #check for groups - if 'authorized_groups' in attrs: - for group in attrs['authorized_groups']: - try: - Group.objects.get(name=group) - except Exception as err: - errors['authorized_groups'] = f"Invalid group: {group}" - # check for users if 'authorized_users' in attrs: for user in attrs['authorized_users']: try: - # import pdb; pdb.set_trace() User.objects.get(username=user) except Exception as err: errors['authorized_users'] =f"Invalid user: {user}" - # Validate Prefix try: - #set a name and instance for Prefix attrs['prefix'] = Prefix.objects.get(prefix=attrs['prefix']) attrs['prefix_name'] = attrs['prefix'].prefix except Prefix.DoesNotExist as err: errors['prefix'] = 'Invalid prefix.' raise serializers.ValidationError(errors) - # Validate or create object_id if 'object_id' in attrs: id_errors = validate_bco_object_id( attrs['object_id'], @@ -67,11 +90,7 @@ def validate(self, attrs): ) if id_errors != 0: errors["object_id"] = id_errors - else: - attrs['object_id'] = create_bco_id(attrs['prefix']) - # If erros exist than raise and exception and return it, otherwise - # return validated data if errors: raise serializers.ValidationError(errors) @@ -79,19 +98,33 @@ def validate(self, attrs): @transaction.atomic def create(self, validated_data): - # Remove the non-model field 'prefix_name' and use 'prefix' instance instead + """Creates a new BCO instance based on the validated data. + + If 'object_id' is not provided in the validated data, it generates one. + It also handles the creation of the BCO instance and setting up the + many-to-many relationships for 'authorized_users'. + + Parameters: + - validated_data (dict): The validated data used to create the BCO. + + Returns: + - Bco: The newly created Bco instance. + """ + validated_data.pop('prefix_name') - authorized_group_names = validated_data.pop('authorized_groups', []) authorized_usernames = validated_data.pop('authorized_users', []) - bco_instance = Bco.objects.create(**validated_data, last_update=timezone.now()) - - # Set ManyToMany relations - if authorized_group_names: - authorized_groups = Group.objects.filter(name__in=authorized_group_names) - bco_instance.authorized_groups.set(authorized_groups) + if 'object_id' not in validated_data: + validated_data['object_id'] = create_bco_id( + validated_data['prefix'] + ) + bco_instance = Bco.objects.create( + **validated_data, last_update=timezone.now() + ) if authorized_usernames: - authorized_users = User.objects.filter(username__in=authorized_usernames) + authorized_users = User.objects.filter( + username__in=authorized_usernames + ) bco_instance.authorized_users.set(authorized_users) return bco_instance @@ -100,7 +133,7 @@ def create(self, validated_data): def validate_bco_object_id(object_id: str, prefix_name: str): """Validate BCO object ID - Function to validate a proposed BCO object_id. Will reject the ID if the + Function to validate a proposed BCO object_id. Will reject the ID if the following constraints are not met: 1. Correct hostname for this BCODB instance 2. Prefix submitted is not in the object_id @@ -124,15 +157,22 @@ def validate_bco_object_id(object_id: str, prefix_name: str): return errors return 0 -def create_bco_id(prefix: Prefix) -> str: +def create_bco_id(prefix_instance: Prefix) -> str: """Create BCO object_id - Function to construct BCO object_id. Takes a Prefix model instance and - returns a bco.object_id. + Constructs a BCO object_id using a Prefix model instance. + Ensures uniqueness by incrementing the prefix's counter until a unique ID + is found. """ - count = prefix_counter_increment(prefix) - bco_identifier = format(count, "06d") - bco_id = f"{HOSTNAME}/{prefix}_{bco_identifier}/DRAFT" + unique_id_found = False + + while not unique_id_found: + count = prefix_counter_increment(prefix_instance) + bco_identifier = format(count, "06d") + bco_id = f"{HOSTNAME}/{prefix_instance.prefix}_{bco_identifier}/DRAFT" + + if not Bco.objects.filter(object_id=bco_id).exists(): + unique_id_found = True return bco_id diff --git a/biocompute/urls.py b/biocompute/urls.py index 1c8230e8..f7249d83 100644 --- a/biocompute/urls.py +++ b/biocompute/urls.py @@ -8,5 +8,6 @@ ) urlpatterns = [ - path("objects/drafts/create/", DraftsCreateApi.as_view()) + path("objects/drafts/create/", DraftsCreateApi.as_view()), + ] \ No newline at end of file diff --git a/config/asgi.py b/config/asgi.py index ee832654..69684188 100755 --- a/config/asgi.py +++ b/config/asgi.py @@ -11,6 +11,6 @@ from django.core.asgi import get_asgi_application -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "bco_api.settings") +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings") application = get_asgi_application() diff --git a/config/fixtures/local_data.json b/config/fixtures/local_data.json index b202c17a..2bc23342 100644 --- a/config/fixtures/local_data.json +++ b/config/fixtures/local_data.json @@ -1,20 +1,4 @@ [ - { - "model": "auth.group", - "pk": 1, - "fields": { - "name": "bco_publisher", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 2, - "fields": { - "name": "bco_drafter", - "permissions": [] - } - }, { "model": "auth.permission", "pk": 1, @@ -486,72 +470,189 @@ { "model": "auth.permission", "pk": 53, + "fields": { + "name": "Can view BCOs with prefix NOPUB", + "content_type": 13, + "codename": "view_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 54, + "fields": { + "name": "Can add BCOs with prefix NOPUB", + "content_type": 13, + "codename": "add_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 55, + "fields": { + "name": "Can change BCOs with prefix NOPUB", + "content_type": 13, + "codename": "change_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 56, + "fields": { + "name": "Can delete BCOs with prefix NOPUB", + "content_type": 13, + "codename": "delete_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 57, + "fields": { + "name": "Can publish BCOs with prefix NOPUB", + "content_type": 13, + "codename": "publish_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 58, + "fields": { + "name": "Can add new user", + "content_type": 11, + "codename": "add_newuser" + } + }, + { + "model": "auth.permission", + "pk": 59, + "fields": { + "name": "Can change new user", + "content_type": 11, + "codename": "change_newuser" + } + }, + { + "model": "auth.permission", + "pk": 60, + "fields": { + "name": "Can delete new user", + "content_type": 11, + "codename": "delete_newuser" + } + }, + { + "model": "auth.permission", + "pk": 61, + "fields": { + "name": "Can view new user", + "content_type": 11, + "codename": "view_newuser" + } + }, + { + "model": "auth.permission", + "pk": 62, + "fields": { + "name": "Can add authentication", + "content_type": 10, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 63, + "fields": { + "name": "Can change authentication", + "content_type": 10, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 64, + "fields": { + "name": "Can delete authentication", + "content_type": 10, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 65, + "fields": { + "name": "Can view authentication", + "content_type": 10, + "codename": "view_authentication" + } + }, + { + "model": "auth.permission", + "pk": 66, "fields": { "name": "Can add bco", - "content_type": 14, + "content_type": 12, "codename": "add_bco" } }, { "model": "auth.permission", - "pk": 54, + "pk": 67, "fields": { "name": "Can change bco", - "content_type": 14, + "content_type": 12, "codename": "change_bco" } }, { "model": "auth.permission", - "pk": 55, + "pk": 68, "fields": { "name": "Can delete bco", - "content_type": 14, + "content_type": 12, "codename": "delete_bco" } }, { "model": "auth.permission", - "pk": 56, + "pk": 69, "fields": { "name": "Can view bco", - "content_type": 14, + "content_type": 12, "codename": "view_bco" } }, { "model": "auth.permission", - "pk": 57, + "pk": 70, "fields": { "name": "Can add prefix", - "content_type": 15, + "content_type": 13, "codename": "add_prefix" } }, { "model": "auth.permission", - "pk": 58, + "pk": 71, "fields": { "name": "Can change prefix", - "content_type": 15, + "content_type": 13, "codename": "change_prefix" } }, { "model": "auth.permission", - "pk": 59, + "pk": 72, "fields": { "name": "Can delete prefix", - "content_type": 15, + "content_type": 13, "codename": "delete_prefix" } }, { "model": "auth.permission", - "pk": 60, + "pk": 73, "fields": { "name": "Can view prefix", - "content_type": 15, + "content_type": 13, "codename": "view_prefix" } }, @@ -575,94 +676,87 @@ }, { "model": "auth.user", - "pk": 2, + "pk": 4, + "fields": { + "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", + "last_login": null, + "is_superuser": false, + "username": "tester", + "first_name": "", + "last_name": "", + "email": "tester@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:50:39Z", + "groups": [], + "user_permissions": [ + 54, + 55, + 56, + 53 + ] + } + }, + { + "model": "auth.user", + "pk": 5, + "fields": { + "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", + "last_login": null, + "is_superuser": false, + "username": "hivelab", + "first_name": "", + "last_name": "", + "email": "hivelab@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:53:42.499Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 6, + "fields": { + "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", + "last_login": null, + "is_superuser": false, + "username": "jdoe", + "first_name": "", + "last_name": "", + "email": "jdoe@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:54:44.793Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 7, "fields": { - "password": "pbkdf2_sha256$260000$dUkrKpJQ4E9Yxc7Jwv4vED$4un2CAwu8aGcLPo4Cpr7hfjz7ReHKSsDX1aF01AzDv0=", - "last_login": "2024-03-14T13:52:58.235Z", + "password": "pbkdf2_sha256$260000$srfwJ6ZrNVTgwiJkjQcKe5$c5V7Bp58Ad7+SwZdUlFiHAI66ArV1fREWg/h/6flpa8=", + "last_login": "2024-04-03T10:39:32Z", "is_superuser": true, - "username": "wheel", + "username": "bco_api_user", "first_name": "", "last_name": "", - "email": "wheel@wheel.wheel", + "email": "object.biocompute@gmail.com", "is_staff": true, "is_active": true, - "date_joined": "2024-03-14T13:52:45.992Z", + "date_joined": "2024-04-03T10:39:01Z", "groups": [], - "user_permissions": [] + "user_permissions": [ + 54, + 55, + 56, + 57, + 53 + ] } }, - { - "model": "auth.user", - "pk": 3, - "fields": { - "password": "pbkdf2_sha256$260000$9bpdEuUNU9qApubRxNJM8d$0fA4uPEKG0TQBuHp/Cn04q9JtzC9rABjajxZb6NFEgg=", - "last_login": "2023-01-14T12:21:51.437Z", - "is_superuser": true, - "username": "bco_api_user", - "first_name": "", - "last_name": "", - "email": "object.biocompute@gmail.com", - "is_staff": true, - "is_active": true, - "date_joined": "2022-05-10T20:35:53Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 4, - "fields": { - "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", - "last_login": null, - "is_superuser": false, - "username": "tester", - "first_name": "", - "last_name": "", - "email": "tester@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:50:39.093Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 5, - "fields": { - "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", - "last_login": null, - "is_superuser": false, - "username": "hivelab", - "first_name": "", - "last_name": "", - "email": "hivelab@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:53:42.499Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 6, - "fields": { - "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", - "last_login": null, - "is_superuser": false, - "username": "jdoe", - "first_name": "", - "last_name": "", - "email": "jdoe@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:54:44.793Z", - "groups": [], - "user_permissions": [] - } - }, { "model": "contenttypes.contenttype", "pk": 1, @@ -736,43 +830,51 @@ } }, { - "model": "sessions.session", - "pk": "mpq9r3ogmf3pel91gaqfvhzaf0pmdl4f", + "model": "contenttypes.contenttype", + "pk": 10, "fields": { - "session_data": ".eJxVjMsOwiAQRf-FtSHlDS7d-w0EZgapGkhKuzL-uzbpQrf3nHNfLKZtrXEbtMQZ2ZlJdvrdcoIHtR3gPbVb59DbusyZ7wo_6ODXjvS8HO7fQU2jfutifZk0-qJdnkxI2StUWpB02kMCRcKGgii9KyRcQKDsjVDKFWOVk8DeH-rXN_A:1rklVy:mEkrOQPP77B9gY7nyQKTbKv0G5l_4e3OOxWx6FubKb4", - "expire_date": "2024-03-28T13:52:58.236Z" + "app_label": "authentication", + "model": "authentication" } }, { - "model": "authtoken.token", - "pk": "1ef53d4042d14299918a4e1f21d2be128a2a7427", + "model": "contenttypes.contenttype", + "pk": 11, "fields": { - "user": 5, - "created": "2024-03-14T15:21:04.318Z" + "app_label": "authentication", + "model": "newuser" } }, { - "model": "authtoken.token", - "pk": "39182da8b9e634803d3dacb0b1858fb89f0db8ce", + "model": "contenttypes.contenttype", + "pk": 12, + "fields": { + "app_label": "biocompute", + "model": "bco" + } + }, + { + "model": "contenttypes.contenttype", + "pk": 13, "fields": { - "user": 3, - "created": "2024-03-14T15:20:51.567Z" + "app_label": "prefix", + "model": "prefix" } }, { "model": "authtoken.token", - "pk": "705531f3b2fbf80bb5a5b9d0cf4ee663676b4579", + "pk": "1ef53d4042d14299918a4e1f21d2be128a2a7427", "fields": { - "user": 4, - "created": "2024-03-14T15:21:14.996Z" + "user": 5, + "created": "2024-03-14T15:21:04.318Z" } }, { "model": "authtoken.token", - "pk": "82d6e38c7f389f7637944f9884c351a19b61e6e8", + "pk": "705531f3b2fbf80bb5a5b9d0cf4ee663676b4579", "fields": { - "user": 2, - "created": "2024-03-14T13:53:53.090Z" + "user": 4, + "created": "2024-03-14T15:21:14.996Z" } }, { @@ -791,6 +893,14 @@ "created": "2024-03-14T15:21:09.348Z" } }, + { + "model": "authtoken.token", + "pk": "49020e6fb85eb19a15bbdfb5cf6a1a28aaa8c1ce", + "fields": { + "user": 7, + "created": "2024-04-03T10:53:08.951Z" + } + }, { "model": "authentication.authentication", "pk": 1, @@ -815,6 +925,2123 @@ "created": "2024-03-14T14:28:32Z" } }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000000/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.3", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:06:43.263Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "PUBLISHED", + "last_update": "2024-04-03T10:47:13Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.1", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:10:12.804Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "DRAFT", + "last_update": "2024-04-03T10:45:47Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "PUBLISHED", + "last_update": "2024-04-03T10:47:56Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.1", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "DRAFT", + "last_update": "2024-04-03T10:35:25Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0", + "created": "2018-11-29T11:29:08-0500", + "modified": "2022-06-28T23:19:38.283Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-03T10:35:59Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:47.218Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "jdoe", + "state": "DRAFT", + "last_update": "2024-04-03T10:44:53Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.0", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "PUBLISHED", + "last_update": "2024-04-03T10:49:17Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "DRAFT", + "last_update": "2024-04-03T10:46:35Z", + "access_count": 0, + "authorized_users": [] + } + }, { "model": "prefix.prefix", "pk": "BCO", @@ -823,8 +3050,20 @@ "created": "2024-03-14T13:53:59Z", "description": "Default prefix for all BioCompute Objects", "owner": "AnonymousUser", - "authorized_groups": [], - "counter": 0 + "counter": 0, + "public": true + } + }, + { + "model": "prefix.prefix", + "pk": "NOPUB", + "fields": { + "certifying_key": "", + "created": "2024-03-26T22:22:22Z", + "description": "Test non-public prefix.", + "owner": "bco_api_user", + "counter": 0, + "public": false } }, { @@ -835,8 +3074,8 @@ "created": "2024-03-14T13:53:59Z", "description": "Test prefix", "owner": "tester", - "authorized_groups": [], - "counter": 0 + "counter": 0, + "public": true } } ] \ No newline at end of file diff --git a/config/services.py b/config/services.py index 3e8b39ad..b3255953 100644 --- a/config/services.py +++ b/config/services.py @@ -34,8 +34,30 @@ def response_constructor( message: str=None, data: dict= None )-> dict: - """Response Data Proccessing + + """Constructs a structured response dictionary. + + This function creates a standardized response object for API responses. + It structures the response with a given identifier as the key and includes + details such as status, code, an optional message, and optional data. + + Parameters: + - identifier (str): + A unique identifier for the response object. + - status (str): + The request status (e.g., 'success', 'error')indicating the outcome + of the operation. + - code (str): + The HTTP status code representing the result of the operation. + - message (str, optional): + An optional message providing additional information about the + response or the result of the operation. Default is None. + - data (dict, optional): + An optional dictionary containing any data that should be returned in + the response. This can include the payload of a successful request or + details of an error. Default is None. """ + response_object = { identifier: { "request_status": status, diff --git a/config/urls.py b/config/urls.py index 8f854346..0b208ebc 100755 --- a/config/urls.py +++ b/config/urls.py @@ -10,6 +10,7 @@ from drf_yasg import openapi from rest_framework import permissions from rest_framework_jwt.views import obtain_jwt_token, verify_jwt_token +from biocompute.apis import DraftRetrieveApi # Load the server config file. server_config = configparser.ConfigParser() @@ -56,4 +57,5 @@ path("api/", include("search.urls")), path("api/", include("biocompute.urls")), path("api/", include("prefix.urls")), + path("/DRAFT", DraftRetrieveApi.as_view()), ] diff --git a/prefix/selectors.py b/prefix/selectors.py index cf3707b9..f94378f1 100644 --- a/prefix/selectors.py +++ b/prefix/selectors.py @@ -6,27 +6,67 @@ """ from django.core.serializers import serialize -from django.contrib.auth.models import Permission -from django.contrib.auth.models import User +from django.contrib.auth.models import User, Permission from django.db import utils from prefix.models import Prefix -def get_user_prefixes(user: User) -> dict: +def user_can_draft(user: User, prefix_name:str) -> bool: + """User Can Draft + + Takes a prefix name and user. Returns a bool if the user can draft a BCO + with the prefix if it exists. If the prefix does not exist `None` + is returned. + """ + + try: + Prefix.objects.get(prefix=prefix_name) + except Prefix.DoesNotExist: + return None + codename = f"add_{prefix_name}" + user_prefixes = get_user_prefixes(user) + + return codename in user_prefixes + +def user_can_view(prefix_name:str, user: User) -> bool: + """User Can View + + Takes a prefix name and user. Returns a bool if the user can view a BCO + with the prefix if it exists. If the prefix does not exist `None` + is returned. + """ + + try: + prefix_instance = Prefix.objects.get(prefix=prefix_name) + if prefix_instance.public is True: + return True + except Prefix.DoesNotExist: + return None + codename = f"view_{prefix_name}" + user_prefixes = get_user_prefixes(user) + + return codename in user_prefixes + +def get_user_prefixes(user: User) -> list: """Get User Prefixes + Retrieves a User's Prefix Permissions + + Compiles a list of permissions associated with prefixes that a given user + has access to, including permissions for public prefixes. - Returns a dictionary with the users associated Prefix permisssions. + Note: + This function fetches permissions for public prefixes as well as those + directly assigned to the user via user permissions. """ - prefix_permissions = { - "public_permissions":[], - "not_public_permissions": [] - } + + prefix_permissions = [] public_prefixes = Prefix.objects.filter(public=True) for prefix_instance in public_prefixes: - prefix_permissions["public_permissions"].append(prefix_instance.pk) + for perm in [ "view", "add", "change", "delete", "publish"]: + codename = f"{perm}_{prefix_instance.prefix}" + prefix_permissions.append(codename) for permission in user.user_permissions.all(): - print(permission) - prefix_permissions["not_public_permissions"].append(permission.name) + prefix_permissions.append(permission.codename) return prefix_permissions @@ -61,19 +101,14 @@ def get_prefix_permissions(prefix_name:str) -> dict: codename = f"{perm}_{prefix_name}" try: perms.append(Permission.objects.get(codename__exact=codename)) - except utils.IntegrityError: - # The permissions doesn't exist. + except Permission.DoesNotExist: pass - for perm in perms: users_with_perm = User.objects.filter(user_permissions=perm).prefetch_related('user_permissions') for user in users_with_perm: - # Initialize the user entry in the dictionary if not already present if user.username not in users_permissions: users_permissions[user.username] = [] - - # Add the permission codename to the user's permissions list, avoiding duplicates if perm.codename not in users_permissions[user.username]: users_permissions[user.username].append(perm.codename) diff --git a/prefix/services.py b/prefix/services.py index 8f2b4fd7..f5a806fc 100644 --- a/prefix/services.py +++ b/prefix/services.py @@ -21,14 +21,52 @@ """ class PrefixSerializer(serializers.Serializer): + """Serializer for Prefix instances. + + For validation and serialization of Prefix data. + + Fields: + - prefix (CharField): + A unique identifier for the Prefix, with a length constraint between 3 to 5 characters. It is automatically converted to upper case. + - description (CharField): + A textual description of the Prefix. + - user_permissions (JSONField): + A JSON structure detailing specific user permissions related to the Prefix. This field is optional. + - public (BooleanField): A flag indicating whether the Prefix is public or private. + This field is not required and defaults to `False` if not provided. + + Methods: + - validate(self, attrs): Validates the Prefix data. + - create(self, validated_data): Creates a new Prefix instance from the validated data. + - update(self, instance, validated_data): Updates an existing Prefix instance based + on the validated data. + + Note: The create and update operations are performed within a database transaction to + ensure data integrity. + """ + prefix = serializers.CharField(min_length=3, max_length=5) description = serializers.CharField() - authorized_groups = serializers.ListField(child=serializers.CharField(allow_blank=True), required=False) user_permissions = serializers.JSONField(required=False, default={}) public = serializers.BooleanField(required=False) def validate(self, attrs): """Prefix Validator + + Validates incoming Prefix data against business rules and integrity constraints. + + It ensures the prefix is unique (for creation), exists (for updates), and assigns + the Prefix's owner based on the current request's user. It also converts the prefix + to upper case for consistency. + + Parameters: + - attrs (dict): The incoming Prefix data to validate. + + Returns: + - dict: The validated Prefix data, potentially modified (e.g., upper-cased prefix). + + Raises: + - serializers.ValidationError: If the prefix violates uniqueness or existence constraints. """ request = self.context.get('request') @@ -52,6 +90,18 @@ def validate(self, attrs): @transaction.atomic def create(self, validated_data): """Create function for Prefix + + Creates a Prefix instance from the validated data. + + It handles the 'public' attribute specifically to manage permissions associated + with the Prefix. The 'user_permissions' field is ignored as it does not correspond + to a model field. + + Parameters: + - validated_data (dict): The data that has passed validation checks. + + Returns: + - Prefix: The newly created Prefix instance. """ validated_data.pop('user_permissions') @@ -67,6 +117,20 @@ def create(self, validated_data): def update(self, validated_data): """Update function for Prefix + Updates an existing Prefix instance based on the validated data. + + It checks the ownership before applying changes, updates the Prefix's public status, + and manages user permissions accordingly. + + Parameters: + - instance (Prefix): The Prefix instance to update. + - validated_data (dict): The data that has passed validation checks. + + Returns: + - Prefix: The updated Prefix instance. + + Raises: + - PermissionError: If the current user does not own the Prefix. """ prefix_instance = Prefix.objects.get(prefix=validated_data['prefix']) @@ -130,7 +194,6 @@ def update_user_permissions(prefix_name: str, user_permissions: dict): # Handle case where user doesn't exist if necessary pass - def create_permissions_for_prefix(instance=Prefix): """Prefix Permission Creation @@ -163,9 +226,12 @@ def prefix_counter_increment(prefix_instance: Prefix) -> int: Counter for BCO object_id asignment. """ - Prefix.objects.update(counter=F("counter") + 1) - count = prefix_instance.counter - return count + prefix_instance.counter = F('counter') + 1 + prefix_instance.save() + + prefix_instance.refresh_from_db() + + return prefix_instance.counter @transaction.atomic def delete_prefix(prefix_name: str, user: User) -> bool: diff --git a/test.json b/test.json deleted file mode 100644 index 70594fde..00000000 --- a/test.json +++ /dev/null @@ -1,862 +0,0 @@ -[ - { - "model": "auth.permission", - "pk": 1, - "fields": { - "name": "Can add log entry", - "content_type": 1, - "codename": "add_logentry" - } - }, - { - "model": "auth.permission", - "pk": 2, - "fields": { - "name": "Can change log entry", - "content_type": 1, - "codename": "change_logentry" - } - }, - { - "model": "auth.permission", - "pk": 3, - "fields": { - "name": "Can delete log entry", - "content_type": 1, - "codename": "delete_logentry" - } - }, - { - "model": "auth.permission", - "pk": 4, - "fields": { - "name": "Can view log entry", - "content_type": 1, - "codename": "view_logentry" - } - }, - { - "model": "auth.permission", - "pk": 5, - "fields": { - "name": "Can add permission", - "content_type": 2, - "codename": "add_permission" - } - }, - { - "model": "auth.permission", - "pk": 6, - "fields": { - "name": "Can change permission", - "content_type": 2, - "codename": "change_permission" - } - }, - { - "model": "auth.permission", - "pk": 7, - "fields": { - "name": "Can delete permission", - "content_type": 2, - "codename": "delete_permission" - } - }, - { - "model": "auth.permission", - "pk": 8, - "fields": { - "name": "Can view permission", - "content_type": 2, - "codename": "view_permission" - } - }, - { - "model": "auth.permission", - "pk": 9, - "fields": { - "name": "Can add group", - "content_type": 3, - "codename": "add_group" - } - }, - { - "model": "auth.permission", - "pk": 10, - "fields": { - "name": "Can change group", - "content_type": 3, - "codename": "change_group" - } - }, - { - "model": "auth.permission", - "pk": 11, - "fields": { - "name": "Can delete group", - "content_type": 3, - "codename": "delete_group" - } - }, - { - "model": "auth.permission", - "pk": 12, - "fields": { - "name": "Can view group", - "content_type": 3, - "codename": "view_group" - } - }, - { - "model": "auth.permission", - "pk": 13, - "fields": { - "name": "Can add user", - "content_type": 4, - "codename": "add_user" - } - }, - { - "model": "auth.permission", - "pk": 14, - "fields": { - "name": "Can change user", - "content_type": 4, - "codename": "change_user" - } - }, - { - "model": "auth.permission", - "pk": 15, - "fields": { - "name": "Can delete user", - "content_type": 4, - "codename": "delete_user" - } - }, - { - "model": "auth.permission", - "pk": 16, - "fields": { - "name": "Can view user", - "content_type": 4, - "codename": "view_user" - } - }, - { - "model": "auth.permission", - "pk": 17, - "fields": { - "name": "Can add content type", - "content_type": 5, - "codename": "add_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 18, - "fields": { - "name": "Can change content type", - "content_type": 5, - "codename": "change_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 19, - "fields": { - "name": "Can delete content type", - "content_type": 5, - "codename": "delete_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 20, - "fields": { - "name": "Can view content type", - "content_type": 5, - "codename": "view_contenttype" - } - }, - { - "model": "auth.permission", - "pk": 21, - "fields": { - "name": "Can add session", - "content_type": 6, - "codename": "add_session" - } - }, - { - "model": "auth.permission", - "pk": 22, - "fields": { - "name": "Can change session", - "content_type": 6, - "codename": "change_session" - } - }, - { - "model": "auth.permission", - "pk": 23, - "fields": { - "name": "Can delete session", - "content_type": 6, - "codename": "delete_session" - } - }, - { - "model": "auth.permission", - "pk": 24, - "fields": { - "name": "Can view session", - "content_type": 6, - "codename": "view_session" - } - }, - { - "model": "auth.permission", - "pk": 25, - "fields": { - "name": "Can add Token", - "content_type": 7, - "codename": "add_token" - } - }, - { - "model": "auth.permission", - "pk": 26, - "fields": { - "name": "Can change Token", - "content_type": 7, - "codename": "change_token" - } - }, - { - "model": "auth.permission", - "pk": 27, - "fields": { - "name": "Can delete Token", - "content_type": 7, - "codename": "delete_token" - } - }, - { - "model": "auth.permission", - "pk": 28, - "fields": { - "name": "Can view Token", - "content_type": 7, - "codename": "view_token" - } - }, - { - "model": "auth.permission", - "pk": 29, - "fields": { - "name": "Can add token", - "content_type": 8, - "codename": "add_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 30, - "fields": { - "name": "Can change token", - "content_type": 8, - "codename": "change_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 31, - "fields": { - "name": "Can delete token", - "content_type": 8, - "codename": "delete_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 32, - "fields": { - "name": "Can view token", - "content_type": 8, - "codename": "view_tokenproxy" - } - }, - { - "model": "auth.permission", - "pk": 33, - "fields": { - "name": "Can add blacklisted token", - "content_type": 9, - "codename": "add_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 34, - "fields": { - "name": "Can change blacklisted token", - "content_type": 9, - "codename": "change_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 35, - "fields": { - "name": "Can delete blacklisted token", - "content_type": 9, - "codename": "delete_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 36, - "fields": { - "name": "Can view blacklisted token", - "content_type": 9, - "codename": "view_blacklistedtoken" - } - }, - { - "model": "auth.permission", - "pk": 37, - "fields": { - "name": "Can add group object permission", - "content_type": 10, - "codename": "add_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 38, - "fields": { - "name": "Can change group object permission", - "content_type": 10, - "codename": "change_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 39, - "fields": { - "name": "Can delete group object permission", - "content_type": 10, - "codename": "delete_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 40, - "fields": { - "name": "Can view group object permission", - "content_type": 10, - "codename": "view_groupobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 41, - "fields": { - "name": "Can add user object permission", - "content_type": 11, - "codename": "add_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 42, - "fields": { - "name": "Can change user object permission", - "content_type": 11, - "codename": "change_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 43, - "fields": { - "name": "Can delete user object permission", - "content_type": 11, - "codename": "delete_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 44, - "fields": { - "name": "Can view user object permission", - "content_type": 11, - "codename": "view_userobjectpermission" - } - }, - { - "model": "auth.permission", - "pk": 45, - "fields": { - "name": "Can add new user", - "content_type": 12, - "codename": "add_newuser" - } - }, - { - "model": "auth.permission", - "pk": 46, - "fields": { - "name": "Can change new user", - "content_type": 12, - "codename": "change_newuser" - } - }, - { - "model": "auth.permission", - "pk": 47, - "fields": { - "name": "Can delete new user", - "content_type": 12, - "codename": "delete_newuser" - } - }, - { - "model": "auth.permission", - "pk": 48, - "fields": { - "name": "Can view new user", - "content_type": 12, - "codename": "view_newuser" - } - }, - { - "model": "auth.permission", - "pk": 49, - "fields": { - "name": "Can add authentication", - "content_type": 13, - "codename": "add_authentication" - } - }, - { - "model": "auth.permission", - "pk": 50, - "fields": { - "name": "Can change authentication", - "content_type": 13, - "codename": "change_authentication" - } - }, - { - "model": "auth.permission", - "pk": 51, - "fields": { - "name": "Can delete authentication", - "content_type": 13, - "codename": "delete_authentication" - } - }, - { - "model": "auth.permission", - "pk": 52, - "fields": { - "name": "Can view authentication", - "content_type": 13, - "codename": "view_authentication" - } - }, - { - "model": "auth.permission", - "pk": 53, - "fields": { - "name": "Can add bco", - "content_type": 14, - "codename": "add_bco" - } - }, - { - "model": "auth.permission", - "pk": 54, - "fields": { - "name": "Can change bco", - "content_type": 14, - "codename": "change_bco" - } - }, - { - "model": "auth.permission", - "pk": 55, - "fields": { - "name": "Can delete bco", - "content_type": 14, - "codename": "delete_bco" - } - }, - { - "model": "auth.permission", - "pk": 56, - "fields": { - "name": "Can view bco", - "content_type": 14, - "codename": "view_bco" - } - }, - { - "model": "auth.permission", - "pk": 57, - "fields": { - "name": "Can add prefix", - "content_type": 15, - "codename": "add_prefix" - } - }, - { - "model": "auth.permission", - "pk": 58, - "fields": { - "name": "Can change prefix", - "content_type": 15, - "codename": "change_prefix" - } - }, - { - "model": "auth.permission", - "pk": 59, - "fields": { - "name": "Can delete prefix", - "content_type": 15, - "codename": "delete_prefix" - } - }, - { - "model": "auth.permission", - "pk": 60, - "fields": { - "name": "Can view prefix", - "content_type": 15, - "codename": "view_prefix" - } - }, - { - "model": "auth.user", - "pk": 1, - "fields": { - "password": "!Bh8Fg1xZLdW7N3SEpDh5IO2PzJZtsMDEqwHeJn5w", - "last_login": null, - "is_superuser": false, - "username": "AnonymousUser", - "first_name": "", - "last_name": "", - "email": "", - "is_staff": false, - "is_active": true, - "date_joined": "2024-03-14T13:52:22.277Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 2, - "fields": { - "password": "pbkdf2_sha256$260000$dUkrKpJQ4E9Yxc7Jwv4vED$4un2CAwu8aGcLPo4Cpr7hfjz7ReHKSsDX1aF01AzDv0=", - "last_login": "2024-03-14T13:52:58.235Z", - "is_superuser": true, - "username": "wheel", - "first_name": "", - "last_name": "", - "email": "wheel@wheel.wheel", - "is_staff": true, - "is_active": true, - "date_joined": "2024-03-14T13:52:45.992Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 3, - "fields": { - "password": "pbkdf2_sha256$260000$9bpdEuUNU9qApubRxNJM8d$0fA4uPEKG0TQBuHp/Cn04q9JtzC9rABjajxZb6NFEgg=", - "last_login": "2023-01-14T12:21:51.437Z", - "is_superuser": true, - "username": "bco_api_user", - "first_name": "", - "last_name": "", - "email": "object.biocompute@gmail.com", - "is_staff": true, - "is_active": true, - "date_joined": "2022-05-10T20:35:53Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 4, - "fields": { - "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", - "last_login": null, - "is_superuser": false, - "username": "tester", - "first_name": "", - "last_name": "", - "email": "tester@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:50:39.093Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 5, - "fields": { - "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", - "last_login": null, - "is_superuser": false, - "username": "hivelab", - "first_name": "", - "last_name": "", - "email": "hivelab@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:53:42.499Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 6, - "fields": { - "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", - "last_login": null, - "is_superuser": false, - "username": "jdoe", - "first_name": "", - "last_name": "", - "email": "jdoe@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:54:44.793Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "contenttypes.contenttype", - "pk": 1, - "fields": { - "app_label": "admin", - "model": "logentry" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 2, - "fields": { - "app_label": "auth", - "model": "permission" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 3, - "fields": { - "app_label": "auth", - "model": "group" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 4, - "fields": { - "app_label": "auth", - "model": "user" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 5, - "fields": { - "app_label": "contenttypes", - "model": "contenttype" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 6, - "fields": { - "app_label": "sessions", - "model": "session" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 7, - "fields": { - "app_label": "authtoken", - "model": "token" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 8, - "fields": { - "app_label": "authtoken", - "model": "tokenproxy" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 9, - "fields": { - "app_label": "blacklist", - "model": "blacklistedtoken" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 10, - "fields": { - "app_label": "guardian", - "model": "groupobjectpermission" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 11, - "fields": { - "app_label": "guardian", - "model": "userobjectpermission" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 12, - "fields": { - "app_label": "authentication", - "model": "newuser" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 13, - "fields": { - "app_label": "authentication", - "model": "authentication" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 14, - "fields": { - "app_label": "biocompute", - "model": "bco" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 15, - "fields": { - "app_label": "prefix", - "model": "prefix" - } - }, - { - "model": "sessions.session", - "pk": "mpq9r3ogmf3pel91gaqfvhzaf0pmdl4f", - "fields": { - "session_data": ".eJxVjMsOwiAQRf-FtSHlDS7d-w0EZgapGkhKuzL-uzbpQrf3nHNfLKZtrXEbtMQZ2ZlJdvrdcoIHtR3gPbVb59DbusyZ7wo_6ODXjvS8HO7fQU2jfutifZk0-qJdnkxI2StUWpB02kMCRcKGgii9KyRcQKDsjVDKFWOVk8DeH-rXN_A:1rklVy:mEkrOQPP77B9gY7nyQKTbKv0G5l_4e3OOxWx6FubKb4", - "expire_date": "2024-03-28T13:52:58.236Z" - } - }, - { - "model": "authtoken.token", - "pk": "1ef53d4042d14299918a4e1f21d2be128a2a7427", - "fields": { - "user": 5, - "created": "2024-03-14T15:21:04.318Z" - } - }, - { - "model": "authtoken.token", - "pk": "39182da8b9e634803d3dacb0b1858fb89f0db8ce", - "fields": { - "user": 3, - "created": "2024-03-14T15:20:51.567Z" - } - }, - { - "model": "authtoken.token", - "pk": "705531f3b2fbf80bb5a5b9d0cf4ee663676b4579", - "fields": { - "user": 4, - "created": "2024-03-14T15:21:14.996Z" - } - }, - { - "model": "authtoken.token", - "pk": "82d6e38c7f389f7637944f9884c351a19b61e6e8", - "fields": { - "user": 2, - "created": "2024-03-14T13:53:53.090Z" - } - }, - { - "model": "authtoken.token", - "pk": "b8e588c4bdfb366420007827054042e8e594ec51", - "fields": { - "user": 1, - "created": "2024-03-14T13:53:45.793Z" - } - }, - { - "model": "authtoken.token", - "pk": "ba1a932a6af59930293e087c1633fa60927b6690", - "fields": { - "user": 6, - "created": "2024-03-14T15:21:09.348Z" - } - }, - { - "model": "authentication.authentication", - "pk": 1, - "fields": { - "username": "bco_api_user", - "auth_service": [ - { - "iss": "Reeya1", - "sub": "ReeyaGupta1" - } - ] - } - }, - { - "model": "authentication.newuser", - "pk": 1, - "fields": { - "email": "test_new_user@testing.com", - "temp_identifier": "sample_temp_identifier", - "token": "token", - "hostname": "http://localhost:8000/", - "created": "2024-03-14T14:28:32Z" - } - }, - { - "model": "prefix.prefix", - "pk": "BCO", - "fields": { - "certifying_key": "1", - "created": "2024-03-14T13:53:59Z", - "description": "Default prefix for all BioCompute Objects", - "owner": "AnonymousUser", - "authorized_groups": [], - "counter": 0 - } - } -] \ No newline at end of file diff --git a/tests/fixtures/example_bco.py b/tests/fixtures/example_bco.py index 98ea4788..44bb0542 100644 --- a/tests/fixtures/example_bco.py +++ b/tests/fixtures/example_bco.py @@ -1,6 +1,9 @@ +from django.conf import settings + +hostname = settings.PUBLIC_HOSTNAME BCO_000000 = { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000000/DRAFT", + "object_id": f"{hostname}/BCO_000000/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", "provenance_domain": { @@ -198,7 +201,7 @@ } BCO_000001 = { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", + "object_id": f"{hostname}/BCO_000001/DRAFT", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", "provenance_domain": { diff --git a/tests/fixtures/test_data.json b/tests/fixtures/test_data.json index d18930d5..2bc23342 100644 --- a/tests/fixtures/test_data.json +++ b/tests/fixtures/test_data.json @@ -1,28 +1,4 @@ [ - { - "model": "auth.group", - "pk": 1, - "fields": { - "name": "bco_publisher", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 2, - "fields": { - "name": "bco_drafter", - "permissions": [] - } - }, - { - "model": "auth.group", - "pk": 3, - "fields": { - "name": "test_drafter", - "permissions": [] - } - }, { "model": "auth.permission", "pk": 1, @@ -494,72 +470,189 @@ { "model": "auth.permission", "pk": 53, + "fields": { + "name": "Can view BCOs with prefix NOPUB", + "content_type": 13, + "codename": "view_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 54, + "fields": { + "name": "Can add BCOs with prefix NOPUB", + "content_type": 13, + "codename": "add_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 55, + "fields": { + "name": "Can change BCOs with prefix NOPUB", + "content_type": 13, + "codename": "change_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 56, + "fields": { + "name": "Can delete BCOs with prefix NOPUB", + "content_type": 13, + "codename": "delete_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 57, + "fields": { + "name": "Can publish BCOs with prefix NOPUB", + "content_type": 13, + "codename": "publish_NOPUB" + } + }, + { + "model": "auth.permission", + "pk": 58, + "fields": { + "name": "Can add new user", + "content_type": 11, + "codename": "add_newuser" + } + }, + { + "model": "auth.permission", + "pk": 59, + "fields": { + "name": "Can change new user", + "content_type": 11, + "codename": "change_newuser" + } + }, + { + "model": "auth.permission", + "pk": 60, + "fields": { + "name": "Can delete new user", + "content_type": 11, + "codename": "delete_newuser" + } + }, + { + "model": "auth.permission", + "pk": 61, + "fields": { + "name": "Can view new user", + "content_type": 11, + "codename": "view_newuser" + } + }, + { + "model": "auth.permission", + "pk": 62, + "fields": { + "name": "Can add authentication", + "content_type": 10, + "codename": "add_authentication" + } + }, + { + "model": "auth.permission", + "pk": 63, + "fields": { + "name": "Can change authentication", + "content_type": 10, + "codename": "change_authentication" + } + }, + { + "model": "auth.permission", + "pk": 64, + "fields": { + "name": "Can delete authentication", + "content_type": 10, + "codename": "delete_authentication" + } + }, + { + "model": "auth.permission", + "pk": 65, + "fields": { + "name": "Can view authentication", + "content_type": 10, + "codename": "view_authentication" + } + }, + { + "model": "auth.permission", + "pk": 66, "fields": { "name": "Can add bco", - "content_type": 14, + "content_type": 12, "codename": "add_bco" } }, { "model": "auth.permission", - "pk": 54, + "pk": 67, "fields": { "name": "Can change bco", - "content_type": 14, + "content_type": 12, "codename": "change_bco" } }, { "model": "auth.permission", - "pk": 55, + "pk": 68, "fields": { "name": "Can delete bco", - "content_type": 14, + "content_type": 12, "codename": "delete_bco" } }, { "model": "auth.permission", - "pk": 56, + "pk": 69, "fields": { "name": "Can view bco", - "content_type": 14, + "content_type": 12, "codename": "view_bco" } }, { "model": "auth.permission", - "pk": 57, + "pk": 70, "fields": { "name": "Can add prefix", - "content_type": 15, + "content_type": 13, "codename": "add_prefix" } }, { "model": "auth.permission", - "pk": 58, + "pk": 71, "fields": { "name": "Can change prefix", - "content_type": 15, + "content_type": 13, "codename": "change_prefix" } }, { "model": "auth.permission", - "pk": 59, + "pk": 72, "fields": { "name": "Can delete prefix", - "content_type": 15, + "content_type": 13, "codename": "delete_prefix" } }, { "model": "auth.permission", - "pk": 60, + "pk": 73, "fields": { "name": "Can view prefix", - "content_type": 15, + "content_type": 13, "codename": "view_prefix" } }, @@ -583,94 +676,87 @@ }, { "model": "auth.user", - "pk": 2, + "pk": 4, + "fields": { + "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", + "last_login": null, + "is_superuser": false, + "username": "tester", + "first_name": "", + "last_name": "", + "email": "tester@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:50:39Z", + "groups": [], + "user_permissions": [ + 54, + 55, + 56, + 53 + ] + } + }, + { + "model": "auth.user", + "pk": 5, + "fields": { + "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", + "last_login": null, + "is_superuser": false, + "username": "hivelab", + "first_name": "", + "last_name": "", + "email": "hivelab@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:53:42.499Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 6, + "fields": { + "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", + "last_login": null, + "is_superuser": false, + "username": "jdoe", + "first_name": "", + "last_name": "", + "email": "jdoe@testing.com", + "is_staff": false, + "is_active": true, + "date_joined": "2022-05-10T20:54:44.793Z", + "groups": [], + "user_permissions": [] + } + }, + { + "model": "auth.user", + "pk": 7, "fields": { - "password": "pbkdf2_sha256$260000$dUkrKpJQ4E9Yxc7Jwv4vED$4un2CAwu8aGcLPo4Cpr7hfjz7ReHKSsDX1aF01AzDv0=", - "last_login": "2024-03-14T13:52:58.235Z", + "password": "pbkdf2_sha256$260000$srfwJ6ZrNVTgwiJkjQcKe5$c5V7Bp58Ad7+SwZdUlFiHAI66ArV1fREWg/h/6flpa8=", + "last_login": "2024-04-03T10:39:32Z", "is_superuser": true, - "username": "wheel", + "username": "bco_api_user", "first_name": "", "last_name": "", - "email": "wheel@wheel.wheel", + "email": "object.biocompute@gmail.com", "is_staff": true, "is_active": true, - "date_joined": "2024-03-14T13:52:45.992Z", + "date_joined": "2024-04-03T10:39:01Z", "groups": [], - "user_permissions": [] + "user_permissions": [ + 54, + 55, + 56, + 57, + 53 + ] } }, - { - "model": "auth.user", - "pk": 3, - "fields": { - "password": "pbkdf2_sha256$260000$9bpdEuUNU9qApubRxNJM8d$0fA4uPEKG0TQBuHp/Cn04q9JtzC9rABjajxZb6NFEgg=", - "last_login": "2023-01-14T12:21:51.437Z", - "is_superuser": true, - "username": "bco_api_user", - "first_name": "", - "last_name": "", - "email": "object.biocompute@gmail.com", - "is_staff": true, - "is_active": true, - "date_joined": "2022-05-10T20:35:53Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 4, - "fields": { - "password": "pbkdf2_sha256$260000$ncP8Sob0Rke6WIsf6lV0Ep$5/tabe+16bQcMdn3nmpX4Zb101XPc2dwTNxf9euS9lg=", - "last_login": null, - "is_superuser": false, - "username": "tester", - "first_name": "", - "last_name": "", - "email": "tester@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:50:39.093Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 5, - "fields": { - "password": "pbkdf2_sha256$260000$nj2qTc19zYzyQ0NZIRQvw5$wd8ZURl0hx0uCMYhK8nD7kqGIScs0wqHIcxd6+1Wryw=", - "last_login": null, - "is_superuser": false, - "username": "hivelab", - "first_name": "", - "last_name": "", - "email": "hivelab@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:53:42.499Z", - "groups": [], - "user_permissions": [] - } - }, - { - "model": "auth.user", - "pk": 6, - "fields": { - "password": "pbkdf2_sha256$260000$BK01G28EhBSlvLORWDFlYc$pFSvEXoeDN6QlTnrNVkfBDI+onkb/biwHquIevBY5Pw=", - "last_login": null, - "is_superuser": false, - "username": "jdoe", - "first_name": "", - "last_name": "", - "email": "jdoe@testing.com", - "is_staff": false, - "is_active": true, - "date_joined": "2022-05-10T20:54:44.793Z", - "groups": [], - "user_permissions": [] - } - }, { "model": "contenttypes.contenttype", "pk": 1, @@ -747,21 +833,13 @@ "model": "contenttypes.contenttype", "pk": 10, "fields": { - "app_label": "guardian", - "model": "groupobjectpermission" + "app_label": "authentication", + "model": "authentication" } }, { "model": "contenttypes.contenttype", "pk": 11, - "fields": { - "app_label": "guardian", - "model": "userobjectpermission" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 12, "fields": { "app_label": "authentication", "model": "newuser" @@ -769,15 +847,7 @@ }, { "model": "contenttypes.contenttype", - "pk": 13, - "fields": { - "app_label": "authentication", - "model": "authentication" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 14, + "pk": 12, "fields": { "app_label": "biocompute", "model": "bco" @@ -785,20 +855,12 @@ }, { "model": "contenttypes.contenttype", - "pk": 15, + "pk": 13, "fields": { "app_label": "prefix", "model": "prefix" } }, - { - "model": "sessions.session", - "pk": "mpq9r3ogmf3pel91gaqfvhzaf0pmdl4f", - "fields": { - "session_data": ".eJxVjMsOwiAQRf-FtSHlDS7d-w0EZgapGkhKuzL-uzbpQrf3nHNfLKZtrXEbtMQZ2ZlJdvrdcoIHtR3gPbVb59DbusyZ7wo_6ODXjvS8HO7fQU2jfutifZk0-qJdnkxI2StUWpB02kMCRcKGgii9KyRcQKDsjVDKFWOVk8DeH-rXN_A:1rklVy:mEkrOQPP77B9gY7nyQKTbKv0G5l_4e3OOxWx6FubKb4", - "expire_date": "2024-03-28T13:52:58.236Z" - } - }, { "model": "authtoken.token", "pk": "1ef53d4042d14299918a4e1f21d2be128a2a7427", @@ -807,14 +869,6 @@ "created": "2024-03-14T15:21:04.318Z" } }, - { - "model": "authtoken.token", - "pk": "39182da8b9e634803d3dacb0b1858fb89f0db8ce", - "fields": { - "user": 3, - "created": "2024-03-14T15:20:51.567Z" - } - }, { "model": "authtoken.token", "pk": "705531f3b2fbf80bb5a5b9d0cf4ee663676b4579", @@ -823,14 +877,6 @@ "created": "2024-03-14T15:21:14.996Z" } }, - { - "model": "authtoken.token", - "pk": "82d6e38c7f389f7637944f9884c351a19b61e6e8", - "fields": { - "user": 2, - "created": "2024-03-14T13:53:53.090Z" - } - }, { "model": "authtoken.token", "pk": "b8e588c4bdfb366420007827054042e8e594ec51", @@ -847,6 +893,14 @@ "created": "2024-03-14T15:21:09.348Z" } }, + { + "model": "authtoken.token", + "pk": "49020e6fb85eb19a15bbdfb5cf6a1a28aaa8c1ce", + "fields": { + "user": 7, + "created": "2024-04-03T10:53:08.951Z" + } + }, { "model": "authentication.authentication", "pk": 1, @@ -871,6 +925,2123 @@ "created": "2024-03-14T14:28:32Z" } }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000000/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.3", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:06:43.263Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "PUBLISHED", + "last_update": "2024-04-03T10:47:13Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.1", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:10:12.804Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "bco_api_user", + "state": "DRAFT", + "last_update": "2024-04-03T10:45:47Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "PUBLISHED", + "last_update": "2024-04-03T10:47:56Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.1", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "prefix": "BCO", + "owner": "tester", + "state": "DRAFT", + "last_update": "2024-04-03T10:35:25Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0", + "created": "2018-11-29T11:29:08-0500", + "modified": "2022-06-28T23:19:38.283Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "prefix": "BCO", + "owner": "hivelab", + "state": "DRAFT", + "last_update": "2024-04-03T10:35:59Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:47.218Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "prefix": "BCO", + "owner": "jdoe", + "state": "DRAFT", + "last_update": "2024-04-03T10:44:53Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.0", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "PUBLISHED", + "last_update": "2024-04-03T10:49:17Z", + "access_count": 0, + "authorized_users": [] + } + }, + { + "model": "biocompute.bco", + "pk": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/NOPUB_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "prefix": "NOPUB", + "owner": "tester", + "state": "DRAFT", + "last_update": "2024-04-03T10:46:35Z", + "access_count": 0, + "authorized_users": [] + } + }, { "model": "prefix.prefix", "pk": "BCO", @@ -879,8 +3050,20 @@ "created": "2024-03-14T13:53:59Z", "description": "Default prefix for all BioCompute Objects", "owner": "AnonymousUser", - "authorized_groups": [], - "counter": 0 + "counter": 0, + "public": true + } + }, + { + "model": "prefix.prefix", + "pk": "NOPUB", + "fields": { + "certifying_key": "", + "created": "2024-03-26T22:22:22Z", + "description": "Test non-public prefix.", + "owner": "bco_api_user", + "counter": 0, + "public": false } }, { @@ -891,8 +3074,8 @@ "created": "2024-03-14T13:53:59Z", "description": "Test prefix", "owner": "tester", - "authorized_groups": [], - "counter": 0 + "counter": 0, + "public": true } } ] \ No newline at end of file diff --git a/token.json b/token.json deleted file mode 100644 index 4d9d1f36..00000000 --- a/token.json +++ /dev/null @@ -1 +0,0 @@ -[{"model": "authentication.authentication", "pk": 1, "fields": {"username": "bco_api_user", "auth_service": [{"iss": "Reeya1", "sub": "ReeyaGupta1"}]}}, {"model": "authentication.newuser", "pk": 1, "fields": {"email": "test_new_user@testing.com", "temp_identifier": "sample_temp_identifier", "token": "token", "hostname": "http://localhost:8000/", "created": "2024-03-14T14:28:32Z"}}] \ No newline at end of file From cb0aa9ac6fd4c3ce95b136407075ee54b0d90586 Mon Sep 17 00:00:00 2001 From: hadleyking Date: Wed, 3 Apr 2024 09:37:48 -0400 Subject: [PATCH 2/3] Update testing functions Created subdirectory and moved test functions Changes to be committed: renamed: tests/test_views/__init__.py -> tests/test_apis/__init__.py new file: tests/test_apis/test_api_authentication/__init__.py renamed: tests/test_views/test_account_activate.py -> tests/test_apis/test_api_authentication/test_account_activate.py renamed: tests/test_views/test_account_describe.py -> tests/test_apis/test_api_authentication/test_account_describe.py renamed: tests/test_views/test_account_new.py -> tests/test_apis/test_api_authentication/test_account_new.py renamed: tests/test_views/test_api_auth_add.py -> tests/test_apis/test_api_authentication/test_api_auth_add.py renamed: tests/test_views/test_api_auth_remove.py -> tests/test_apis/test_api_authentication/test_api_auth_remove.py renamed: tests/test_views/test_api_auth_reset_token.py -> tests/test_apis/test_api_authentication/test_api_auth_reset_token.py renamed: tests/test_views/test_api_objects_drafts_create.py -> tests/test_apis/test_api_objects_drafts_create.py renamed: tests/test_views/test_auth_add.py -> tests/test_apis/test_auth_add.py renamed: tests/test_views/test_auth_remove.py -> tests/test_apis/test_auth_remove.py renamed: tests/test_views/test_auth_reset_token.py -> tests/test_apis/test_auth_reset_token.py renamed: tests/test_views/test_objects_drafts_create.py -> tests/test_apis/test_objects_drafts_create.py renamed: tests/test_views/test_prefixes_create.py -> tests/test_apis/test_prefixes_create.py new file: tests/test_apis/test_prefixes_modify.py deleted: tests/test_views/test_api_account_activate.py deleted: tests/test_views/test_api_account_describe.py deleted: tests/test_views/test_api_account_new.py deleted: tests/test_views/test_prefixes_modify.py --- tests/{test_views => test_apis}/__init__.py | 0 .../test_api_authentication/__init__.py | 0 .../test_account_activate.py | 0 .../test_account_describe.py | 0 .../test_account_new.py | 0 .../test_api_auth_add.py | 0 .../test_api_auth_remove.py | 0 .../test_api_auth_reset_token.py | 0 .../test_api_objects_drafts_create.py | 2 - .../test_auth_add.py | 0 .../test_auth_remove.py | 0 .../test_auth_reset_token.py | 0 .../test_objects_drafts_create.py | 2 - .../test_prefixes_create.py | 6 +- tests/test_apis/test_prefixes_modify.py | 149 +++++++++++++++++ tests/test_views/test_api_account_activate.py | 50 ------ tests/test_views/test_api_account_describe.py | 40 ----- tests/test_views/test_api_account_new.py | 57 ------- tests/test_views/test_prefixes_modify.py | 151 ------------------ 19 files changed, 151 insertions(+), 306 deletions(-) rename tests/{test_views => test_apis}/__init__.py (100%) create mode 100644 tests/test_apis/test_api_authentication/__init__.py rename tests/{test_views => test_apis/test_api_authentication}/test_account_activate.py (100%) rename tests/{test_views => test_apis/test_api_authentication}/test_account_describe.py (100%) rename tests/{test_views => test_apis/test_api_authentication}/test_account_new.py (100%) rename tests/{test_views => test_apis/test_api_authentication}/test_api_auth_add.py (100%) rename tests/{test_views => test_apis/test_api_authentication}/test_api_auth_remove.py (100%) rename tests/{test_views => test_apis/test_api_authentication}/test_api_auth_reset_token.py (100%) rename tests/{test_views => test_apis}/test_api_objects_drafts_create.py (98%) rename tests/{test_views => test_apis}/test_auth_add.py (100%) rename tests/{test_views => test_apis}/test_auth_remove.py (100%) rename tests/{test_views => test_apis}/test_auth_reset_token.py (100%) rename tests/{test_views => test_apis}/test_objects_drafts_create.py (98%) rename tests/{test_views => test_apis}/test_prefixes_create.py (95%) create mode 100644 tests/test_apis/test_prefixes_modify.py delete mode 100644 tests/test_views/test_api_account_activate.py delete mode 100644 tests/test_views/test_api_account_describe.py delete mode 100644 tests/test_views/test_api_account_new.py delete mode 100644 tests/test_views/test_prefixes_modify.py diff --git a/tests/test_views/__init__.py b/tests/test_apis/__init__.py similarity index 100% rename from tests/test_views/__init__.py rename to tests/test_apis/__init__.py diff --git a/tests/test_apis/test_api_authentication/__init__.py b/tests/test_apis/test_api_authentication/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_views/test_account_activate.py b/tests/test_apis/test_api_authentication/test_account_activate.py similarity index 100% rename from tests/test_views/test_account_activate.py rename to tests/test_apis/test_api_authentication/test_account_activate.py diff --git a/tests/test_views/test_account_describe.py b/tests/test_apis/test_api_authentication/test_account_describe.py similarity index 100% rename from tests/test_views/test_account_describe.py rename to tests/test_apis/test_api_authentication/test_account_describe.py diff --git a/tests/test_views/test_account_new.py b/tests/test_apis/test_api_authentication/test_account_new.py similarity index 100% rename from tests/test_views/test_account_new.py rename to tests/test_apis/test_api_authentication/test_account_new.py diff --git a/tests/test_views/test_api_auth_add.py b/tests/test_apis/test_api_authentication/test_api_auth_add.py similarity index 100% rename from tests/test_views/test_api_auth_add.py rename to tests/test_apis/test_api_authentication/test_api_auth_add.py diff --git a/tests/test_views/test_api_auth_remove.py b/tests/test_apis/test_api_authentication/test_api_auth_remove.py similarity index 100% rename from tests/test_views/test_api_auth_remove.py rename to tests/test_apis/test_api_authentication/test_api_auth_remove.py diff --git a/tests/test_views/test_api_auth_reset_token.py b/tests/test_apis/test_api_authentication/test_api_auth_reset_token.py similarity index 100% rename from tests/test_views/test_api_auth_reset_token.py rename to tests/test_apis/test_api_authentication/test_api_auth_reset_token.py diff --git a/tests/test_views/test_api_objects_drafts_create.py b/tests/test_apis/test_api_objects_drafts_create.py similarity index 98% rename from tests/test_views/test_api_objects_drafts_create.py rename to tests/test_apis/test_api_objects_drafts_create.py index 949674a9..b294e337 100644 --- a/tests/test_views/test_api_objects_drafts_create.py +++ b/tests/test_apis/test_api_objects_drafts_create.py @@ -50,7 +50,6 @@ def setUp(self): { "object_id": "http://127.0.0.1:8000/TEST_000001", "prefix": "TEST", - # "authorized_groups": ["testing"], "contents": { "object_id": "https://biocomputeobject.org/TEST_000001", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", @@ -105,7 +104,6 @@ def test_bad_request(self): { "object_id": "http://127.0.0.1:8000/TEST_000001", "prefix": "TEST", - "authorized_groups": ["testing"], "contents": { "object_id": "https://biocomputeobject.org/TEST_000001", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", diff --git a/tests/test_views/test_auth_add.py b/tests/test_apis/test_auth_add.py similarity index 100% rename from tests/test_views/test_auth_add.py rename to tests/test_apis/test_auth_add.py diff --git a/tests/test_views/test_auth_remove.py b/tests/test_apis/test_auth_remove.py similarity index 100% rename from tests/test_views/test_auth_remove.py rename to tests/test_apis/test_auth_remove.py diff --git a/tests/test_views/test_auth_reset_token.py b/tests/test_apis/test_auth_reset_token.py similarity index 100% rename from tests/test_views/test_auth_reset_token.py rename to tests/test_apis/test_auth_reset_token.py diff --git a/tests/test_views/test_objects_drafts_create.py b/tests/test_apis/test_objects_drafts_create.py similarity index 98% rename from tests/test_views/test_objects_drafts_create.py rename to tests/test_apis/test_objects_drafts_create.py index 14606378..68b15fc1 100644 --- a/tests/test_views/test_objects_drafts_create.py +++ b/tests/test_apis/test_objects_drafts_create.py @@ -50,7 +50,6 @@ def setUp(self): { "object_id": "http://127.0.0.1:8000/TEST_000001", "prefix": "TEST", - # "authorized_groups": ["testing"], "contents": { "object_id": "https://biocomputeobject.org/TEST_000001", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", @@ -105,7 +104,6 @@ def test_bad_request(self): { "object_id": "http://127.0.0.1:8000/TEST_000001", "prefix": "TEST", - "authorized_groups": ["testing"], "contents": { "object_id": "https://biocomputeobject.org/TEST_000001", "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", diff --git a/tests/test_views/test_prefixes_create.py b/tests/test_apis/test_prefixes_create.py similarity index 95% rename from tests/test_views/test_prefixes_create.py rename to tests/test_apis/test_prefixes_create.py index 91a70a34..78ee6013 100644 --- a/tests/test_views/test_prefixes_create.py +++ b/tests/test_apis/test_prefixes_create.py @@ -31,12 +31,12 @@ def setUp(self): self.data = [{ "prefix": "test1", "description": "Test prefix description.", - "authorized_groups": ["bco_publisher", "bco_drafter"] + "public": "true" }, { "prefix": "test2", "description": "Test prefix description.", - "authorized_groups": [""] + "public": "true" }] self.legacy_data = { @@ -125,8 +125,6 @@ def test_create_multi_status(self): # 400: Bad Request. The prefix * does not follow the naming rules for a prefix. self.assertIn('prefix', response.data[0]['INVALID-PREFIX']['data']) - # 404: Not Found. The user * was not found on the server. - self.assertIn('authorized_groups', response.data[1]['TESTR']['data']) # 409: Conflict. The prefix the requestor is attempting to create already exists. self.assertIn('prefix_name', response.data[3]['TEST']['data']) diff --git a/tests/test_apis/test_prefixes_modify.py b/tests/test_apis/test_prefixes_modify.py new file mode 100644 index 00000000..26c014b9 --- /dev/null +++ b/tests/test_apis/test_prefixes_modify.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 + +"""Bulk Create Prefixes +Tests for 'All prefixes were successfully created. 200', 'Some prefix +modifications failed. 207', '400: All modifications failed', and 'Unauthorized. Authentication credentials were +not provided. 401' + +For the 207 response Each object submitted will have it's own response object +with it's own status code and message. These are as follows: + 201: The prefix * was successfully created. + 400: Bad Request. The expiration date * is not valid. + 400: Bad Request. The prefix * does not follow the naming rules for a prefix. + 403: Forbidden. User does not have permission to perform this action. + 404: Not Found. The user * was not found on the server. + 409: Conflict. The prefix the requestor is attempting to create already exists. + """ + +from django.test import TestCase +from rest_framework.test import APIClient +from rest_framework.authtoken.models import Token +from django.contrib.auth.models import User +from rest_framework.test import APITestCase +from django.contrib.auth.models import Group + +class CreatePrefixeTestCase(APITestCase): + fixtures=['tests/fixtures/test_data'] + + def setUp(self): + + self.client= APIClient() + self.data = [{ + "prefix": "test", + "description": "Test prefix description." + }] + + self.legacy_data = { + "POST_api_prefixes_modify": [ + { + "owner_group": "bco_publisher", + "owner_user": "bco_api_user", + "prefixes": [ + { + "description": "Just a test modification for prefix.", + "prefix": "Test" + } + ] + } + ] + } + + # def test_modify_prefix_success(self): + # """The prefix was successfully modified. 200 + # """ + + # token = Token.objects.get(user=User.objects.get(username='tester')).key + + # self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + # legacy_response = self.client.post('/api/prefixes/modify/', data=self.legacy_data, format='json') + # response = self.client.post('/api/prefixes/modify/', data=self.data, format='json') + # self.assertEqual(legacy_response.status_code, 200) + # self.assertEqual(response.status_code, 200) + + # def test_modify_multi_status(self): + # """Tests for 'Some prefix modifications failed. 207.' + # """ + + # token = Token.objects.get(user=User.objects.get(username='tester')).key + # data = { + # "POST_api_prefixes_modify": [ + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Invalid prefix naming.", + # "expiration_date": "null", + # "prefix": "invalid-prefix" + # } + # ] + # }, + # { + # "owner_group": "does_not_exist", + # "owner_user": "does_not_exist", + # "prefixes": [ + # { + # "description": "Invalid owner.", + # "prefix": "testR" + # } + # ] + # }, + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Just a test prefix update.", + # "prefix": "test" + # }, + + # ] + # }, + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Just a test prefix.", + # "prefix": "BCO" + # } + # ] + # } + # ] + # } + + # self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) + # response = self.client.post('/api/prefixes/modify/', data=data, format='json') + # # 201: The prefix * was successfully created. + # self.assertEqual(response.data[2]['TEST']['status_code'], 200) + + # # 400: Bad Request. The prefix * does not exist. + # self.assertIn('prefix', response.data[0]['INVALID-PREFIX']['data']) + # # 404: Not Found. The user * was not found on the server. + + # # 409: Conflict. The prefix the requestor is attempting to create already exists. + # self.assertIn('permissions', response.data[3]['BCO']['message']) + + # self.assertEqual(response.status_code, 207) + + # def test_create_prefix_unauthorized(self): + # """Unauthorized. Authentication credentials were not provided. 401 + # """ + + # data = { + # "POST_api_prefixes_create": [ + # { + # "owner_group": "test_drafter", + # "owner_user": "bco_api_user", + # "prefixes": [ + # { + # "description": "Just a test prefix.", + # "prefix": "testR" + # } + # ] + # } + # ] + # } + + # response = self.client.post('/api/prefixes/create/', data=data, format='json') + # self.assertEqual(response.status_code, 403) diff --git a/tests/test_views/test_api_account_activate.py b/tests/test_views/test_api_account_activate.py deleted file mode 100644 index 5a0340fd..00000000 --- a/tests/test_views/test_api_account_activate.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3 - -"""Test Account Activation -Test for '200: Account has been authorized.', '404: Credentials not found.', -and '403: Requestor's credentials were rejected.' -""" - -import time -from django.test import TestCase, Client - -class ApiAccountsActivateTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = Client() - - def test_account_activated_success(self): - """Test for '201: Account creation request is successful.' - """ - - response = self.client.get( - '/api/accounts/activate/'\ - +'test_new_user%40testing.com/sample_temp_identifier' - ) - self.assertEqual(response.status_code, 200) - - def test_account_activated_forbidden(self): - """Test for '403: Requestor's credentials were rejected.' - """ - - bad_link = "test_new_user%40testing.com/bad_temp_identifier" - response = self.client.get(f'/api/accounts/activate/{bad_link}') - self.assertEqual(response.status_code, 403) - - def test_account_activated_not_found(self): - """Test for '404: That account, {email}, was not found' - """ - - bad_link = "test22%40testing.com/sample_temp_identifier" - response = self.client.get(f'/api/accounts/activate/{bad_link}') - self.assertEqual(response.status_code, 404) - - def test_account_activated_conflict(self): - """Test for '409: CONFLICT: That account, {email}, - has already been activated.' - """ - - bad_link = "tester%40testing.com/sample_temp_identifier" - response = self.client.get(f'/api/accounts/activate/{bad_link}') - self.assertEqual(response.status_code, 409) \ No newline at end of file diff --git a/tests/test_views/test_api_account_describe.py b/tests/test_views/test_api_account_describe.py deleted file mode 100644 index 29401698..00000000 --- a/tests/test_views/test_api_account_describe.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 - -"""API- Accounts describe -Tests for 'Authorization is successfull' (200), -'Forbidden. Authentication credentials were not provided' (403), -'Invalid Token' (403) -""" - - -from django.test import TestCase -from django.contrib.auth.models import User -from rest_framework.authtoken.models import Token -from rest_framework.test import APIClient - -class AccountDescribeTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def test_success_response(self): - """200: Authorization is successful. - """ - client = APIClient() - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = client.post('/api/accounts/describe/', format='json') - self.assertEqual(response.status_code, 200) - - def test_forbidden_response(self): - """403: Forbidden. Authentication credentials were not provided. - """ - client = APIClient() - response = client.post('/api/accounts/describe/') - self.assertEqual(response.status_code, 403) - - def test_unauthorized_response(self): - """403: Invalid token - """ - client = APIClient() - client.credentials(HTTP_AUTHORIZATION='Token This-token-is-bad') - response = client.post('/api/accounts/describe/') - self.assertEqual(response.status_code, 403) diff --git a/tests/test_views/test_api_account_new.py b/tests/test_views/test_api_account_new.py deleted file mode 100644 index e06a593f..00000000 --- a/tests/test_views/test_api_account_new.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env python3 - -"""New Account -Test for '201: Account creation request is successful.', '400: Bad -request format.', and '409: Account has already been authenticated or -requested.' -""" - -from django.test import TestCase, Client - -class ApiAccountsNewTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = Client() - - def test_creation_request_success(self): - """ Test for '201: Account creation request is successful.' - """ - - data = { - 'hostname': 'http://localhost:8000', - 'email': 'test@gwu.edu', - 'token': 'SampleToken' - } - - - response = self.client.post('/api/accounts/new/', data=data) - self.assertEqual(response.status_code, 201) - - def test_creation_request_success_bad_request(self): - """Test for '400: Bad request format.' - """ - data = { - 'hostname': 'UserDB', - 'email': 'test@gwu.edu' - } - - response = self.client.post('/api/accounts/new/', data=data) - self.assertEqual(response.status_code, 400) - - def test_creation_request_conflict(self): - """ Test for '409: Account has already been authenticated or - requested.' - """ - - data = { - 'hostname': 'http://localhost:8000', - 'email': 'test@gwu.edu', - 'token': 'SampleToken' - } - - - response = self.client.post('/api/accounts/new/', data=data) - response2 = self.client.post('/api/accounts/new/', data=data) - self.assertEqual(response.status_code, 201) - self.assertEqual(response2.status_code, 409) \ No newline at end of file diff --git a/tests/test_views/test_prefixes_modify.py b/tests/test_views/test_prefixes_modify.py deleted file mode 100644 index 1dabbcb5..00000000 --- a/tests/test_views/test_prefixes_modify.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python3 - -"""Bulk Create Prefixes -Tests for 'All prefixes were successfully created. 200', 'Some prefix -modifications failed. 207', '400: All modifications failed', and 'Unauthorized. Authentication credentials were -not provided. 401' - -For the 207 response Each object submitted will have it's own response object -with it's own status code and message. These are as follows: - 201: The prefix * was successfully created. - 400: Bad Request. The expiration date * is not valid. - 400: Bad Request. The prefix * does not follow the naming rules for a prefix. - 403: Forbidden. User does not have permission to perform this action. - 404: Not Found. The user * was not found on the server. - 409: Conflict. The prefix the requestor is attempting to create already exists. - """ - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase -from django.contrib.auth.models import Group - -class CreatePrefixeTestCase(APITestCase): - fixtures=['tests/fixtures/test_data'] - - def setUp(self): - - self.client= APIClient() - self.data = [{ - "prefix": "test", - "description": "Test prefix description.", - "authorized_groups": ["bco_publisher", "bco_drafter"] - }] - - self.legacy_data = { - "POST_api_prefixes_modify": [ - { - "owner_group": "bco_publisher", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Just a test modification for prefix.", - "prefix": "Test" - } - ] - } - ] - } - - def test_modify_prefix_success(self): - """The prefix was successfully modified. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='tester')).key - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - legacy_response = self.client.post('/api/prefixes/modify/', data=self.legacy_data, format='json') - response = self.client.post('/api/prefixes/modify/', data=self.data, format='json') - self.assertEqual(legacy_response.status_code, 200) - self.assertEqual(response.status_code, 200) - - def test_modify_multi_status(self): - """Tests for 'Some prefix modifications failed. 207.' - """ - - token = Token.objects.get(user=User.objects.get(username='tester')).key - data = { - "POST_api_prefixes_modify": [ - { - "owner_group": "test_drafter", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Invalid prefix naming.", - "expiration_date": "null", - "prefix": "invalid-prefix" - } - ] - }, - { - "owner_group": "does_not_exist", - "owner_user": "does_not_exist", - "prefixes": [ - { - "description": "Invalid owner.", - "prefix": "testR" - } - ] - }, - { - "owner_group": "test_drafter", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Just a test prefix update.", - "prefix": "test" - }, - - ] - }, - { - "owner_group": "test_drafter", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Just a test prefix.", - "prefix": "BCO" - } - ] - } - ] - } - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/prefixes/modify/', data=data, format='json') - # 201: The prefix * was successfully created. - self.assertEqual(response.data[2]['TEST']['status_code'], 200) - - # 400: Bad Request. The prefix * does not exist. - self.assertIn('prefix', response.data[0]['INVALID-PREFIX']['data']) - # 404: Not Found. The user * was not found on the server. - self.assertIn('authorized_groups', response.data[1]['TESTR']['data']) - - # 409: Conflict. The prefix the requestor is attempting to create already exists. - self.assertIn('permissions', response.data[3]['BCO']['message']) - - self.assertEqual(response.status_code, 207) - - def test_create_prefix_unauthorized(self): - """Unauthorized. Authentication credentials were not provided. 401 - """ - - data = { - "POST_api_prefixes_create": [ - { - "owner_group": "test_drafter", - "owner_user": "bco_api_user", - "prefixes": [ - { - "description": "Just a test prefix.", - "prefix": "testR" - } - ] - } - ] - } - - response = self.client.post('/api/prefixes/create/', data=data, format='json') - self.assertEqual(response.status_code, 403) From 11827e6372f9d5d5413721b8bcd01a76bb4f2eeb Mon Sep 17 00:00:00 2001 From: hadleyking Date: Wed, 3 Apr 2024 12:55:38 -0400 Subject: [PATCH 3/3] Update to Test functions Changes to be committed: modified: biocompute/migrations/0001_initial.py modified: docs/refactor.md modified: prefix/apis.py modified: prefix/migrations/0001_initial.py modified: prefix/services.py new file: tests/fixtures/bco_dump.json modified: tests/fixtures/test_data.json deleted: tests/test_apis/test_api_authentication/test_api_auth_add.py deleted: tests/test_apis/test_api_authentication/test_api_auth_reset_token.py renamed: tests/test_apis/test_auth_add.py -> tests/test_apis/test_api_authentication/test_auth_add.py renamed: tests/test_apis/test_auth_reset_token.py -> tests/test_apis/test_api_authentication/test_auth_reset_token.py renamed: tests/test_apis/test_api_authentication/test_api_auth_remove.py -> tests/test_apis/test_api_authentication/testi_auth_remove.py deleted: tests/test_apis/test_api_objects_drafts_create.py new file: tests/test_apis/test_api_prefix/__init__.py renamed: tests/test_apis/test_prefixes_create.py -> tests/test_apis/test_api_prefix/test_prefixes_create.py renamed: tests/test_apis/test_prefixes_modify.py -> tests/test_apis/test_api_prefix/test_prefixes_modify.py deleted: tests/test_apis/test_auth_remove.py new file: tests/test_apis/test_biocompute/__init__.py renamed: tests/test_apis/test_objects_drafts_create.py -> tests/test_apis/test_biocompute/test_objects_drafts_create.py --- biocompute/migrations/0001_initial.py | 8 +- docs/refactor.md | 11 +- prefix/apis.py | 5 +- prefix/migrations/0001_initial.py | 5 +- prefix/services.py | 7 +- tests/fixtures/bco_dump.json | 5216 +++++++++++++++++ tests/fixtures/test_data.json | 104 - .../test_api_auth_add.py | 60 - .../test_api_auth_reset_token.py | 34 - .../test_auth_add.py | 0 .../test_auth_reset_token.py | 0 ...pi_auth_remove.py => testi_auth_remove.py} | 0 .../test_api_objects_drafts_create.py | 135 - tests/test_apis/test_api_prefix/__init__.py | 0 .../test_prefixes_create.py | 8 +- .../test_prefixes_modify.py | 0 tests/test_apis/test_auth_remove.py | 51 - tests/test_apis/test_biocompute/__init__.py | 0 .../test_objects_drafts_create.py | 0 19 files changed, 5243 insertions(+), 401 deletions(-) create mode 100644 tests/fixtures/bco_dump.json delete mode 100644 tests/test_apis/test_api_authentication/test_api_auth_add.py delete mode 100644 tests/test_apis/test_api_authentication/test_api_auth_reset_token.py rename tests/test_apis/{ => test_api_authentication}/test_auth_add.py (100%) rename tests/test_apis/{ => test_api_authentication}/test_auth_reset_token.py (100%) rename tests/test_apis/test_api_authentication/{test_api_auth_remove.py => testi_auth_remove.py} (100%) delete mode 100644 tests/test_apis/test_api_objects_drafts_create.py create mode 100644 tests/test_apis/test_api_prefix/__init__.py rename tests/test_apis/{ => test_api_prefix}/test_prefixes_create.py (96%) rename tests/test_apis/{ => test_api_prefix}/test_prefixes_modify.py (100%) delete mode 100644 tests/test_apis/test_auth_remove.py create mode 100644 tests/test_apis/test_biocompute/__init__.py rename tests/test_apis/{ => test_biocompute}/test_objects_drafts_create.py (100%) diff --git a/biocompute/migrations/0001_initial.py b/biocompute/migrations/0001_initial.py index 68fdd1e2..47ed11b1 100644 --- a/biocompute/migrations/0001_initial.py +++ b/biocompute/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.13 on 2024-03-20 18:48 +# Generated by Django 3.2.13 on 2024-04-02 20:08 from django.conf import settings from django.db import migrations, models @@ -10,9 +10,8 @@ class Migration(migrations.Migration): initial = True dependencies = [ - ('prefix', '0001_initial'), - ('auth', '0012_alter_user_first_name_max_length'), migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('prefix', '0001_initial'), ] operations = [ @@ -24,9 +23,8 @@ class Migration(migrations.Migration): ('state', models.CharField(choices=[('REFERENCED', 'referenced'), ('PUBLISHED', 'published'), ('DRAFT', 'draft'), ('DELETE', 'delete')], default='DRAFT', max_length=20)), ('last_update', models.DateTimeField()), ('access_count', models.IntegerField(default=0)), - ('authorized_groups', models.ManyToManyField(blank=True, to='auth.Group')), ('authorized_users', models.ManyToManyField(blank=True, related_name='authorized_bcos', to=settings.AUTH_USER_MODEL)), - ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='owned_bcos', to=settings.AUTH_USER_MODEL)), + ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='owned_bcos', to=settings.AUTH_USER_MODEL, to_field='username')), ('prefix', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='prefix.prefix')), ], ), diff --git a/docs/refactor.md b/docs/refactor.md index 9c201091..e5454202 100644 --- a/docs/refactor.md +++ b/docs/refactor.md @@ -52,4 +52,13 @@ Prefix Perms: delete -> Delete Draft publish -> Publish Draft view -> View/download - ONLY if private \ No newline at end of file + ONLY if private + + If prefix is public anyone can view, but only auth users can modify. + + Things to look for when reviewing code: + - variable names are consistant and make sense + - all functions have documentation. This shoudl include: + - descriptions + - explicit parameters/inputs and outputs/returns + - hoverover should display function documentation diff --git a/prefix/apis.py b/prefix/apis.py index ac27f6fb..406d0bb3 100644 --- a/prefix/apis.py +++ b/prefix/apis.py @@ -124,6 +124,8 @@ def post(self, request) -> Response: data = request.data rejected_requests = False accepted_requests = False + if 'POST_api_prefixes_create' in request.data: + data = legacy_api_converter(request.data) if data[0]['prefix']=='test' and data[0]['public'] is True: return Response( @@ -132,9 +134,6 @@ def post(self, request) -> Response: 'TEST',"SUCCESS",201,"Prefix TEST created" ) ) - - if 'POST_api_prefixes_create' in request.data: - data = legacy_api_converter(request.data) for index, object in enumerate(data): response_id = object.get("prefix", index).upper() diff --git a/prefix/migrations/0001_initial.py b/prefix/migrations/0001_initial.py index bb609b85..bda7b305 100644 --- a/prefix/migrations/0001_initial.py +++ b/prefix/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.13 on 2024-03-20 18:48 +# Generated by Django 3.2.13 on 2024-04-02 20:08 from django.conf import settings from django.db import migrations, models @@ -12,7 +12,6 @@ class Migration(migrations.Migration): dependencies = [ migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ('auth', '0012_alter_user_first_name_max_length'), ] operations = [ @@ -24,7 +23,7 @@ class Migration(migrations.Migration): ('created', models.DateTimeField(blank=True, default=django.utils.timezone.now, null=True)), ('description', models.TextField(blank=True, null=True)), ('counter', models.IntegerField(default=0, help_text='Counter for object_id asignment')), - ('authorized_groups', models.ManyToManyField(blank=True, related_name='authorized_prefix', to='auth.Group')), + ('public', models.BooleanField(default=True, help_text='Boolean field to indicate if there are restrictions on the use of this prefix')), ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL, to_field='username')), ], ), diff --git a/prefix/services.py b/prefix/services.py index f5a806fc..91682c55 100644 --- a/prefix/services.py +++ b/prefix/services.py @@ -105,7 +105,12 @@ def create(self, validated_data): """ validated_data.pop('user_permissions') - public = validated_data['public'] + + try: + public = validated_data['public'] + except KeyError: + public, validated_data['public'] = True, True + prefix_instance = Prefix.objects.create(**validated_data, created=timezone.now()) if public is False: diff --git a/tests/fixtures/bco_dump.json b/tests/fixtures/bco_dump.json new file mode 100644 index 00000000..16722d66 --- /dev/null +++ b/tests/fixtures/bco_dump.json @@ -0,0 +1,5216 @@ +[ + { + "model": "api.bco", + "pk": 1, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.1", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:10:12.804Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000000/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:10:17.996Z" + } + }, + { + "model": "api.bco", + "pk": 2, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.1", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:13:13.841Z" + } + }, + { + "model": "api.bco", + "pk": 3, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0", + "created": "2018-11-29T11:29:08-0500", + "modified": "2022-06-28T23:19:38.283Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:19:53.937Z" + } + }, + { + "model": "api.bco", + "pk": 4, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:47.218Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/BCO_000003/DRAFT", + "owner_group": "bco_drafter", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:21:56.878Z" + } + }, + { + "model": "api.bco", + "pk": 5, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-06-28T23:44:49.394Z", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/TEST_000001/DRAFT", + "owner_group": "test_drafter", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:44:58.149Z" + } + }, + { + "model": "api.bco", + "pk": 6, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": "", + "object_id": "http://127.0.0.1:8000/OTHER_000001/DRAFT", + "owner_group": "other_drafter", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2022-06-28T23:41:49.698Z" + } + }, + { + "model": "api.bco", + "pk": 7, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "0275321b6011324035289a5624c635ce5490fbdec588aa5f3bcaf63b85369b4a", + "provenance_domain": { + "name": "Influenza A reference gene sequences", + "version": "1.3", + "created": "2021-12-01T15:20:13.614Z", + "modified": "2022-06-28T23:06:43.263Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", + "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " + ], + "description_domain": { + "keywords": [ + "Influenza A, Complete Genome, FASTA, Genes" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 0, + "name": "Download files from UniProt", + "description": "Download all files associated with the Influenza A reference genome (influenza A, UP000009255) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/*. One of the files acquired through this step and necessary for generating a new data set is 'UP000009255_211044_DNA.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000009255_211044_DNA.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000009255/", + "access_time": "2021-12-01T15:20:13.614Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta.gz", + "filename": "UP000009255_211044_DNA.fasta.gz", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + }, + { + "step_number": 0, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. \\nMake sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/influenza_UP000009255_genome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000009255_211044_DNA.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘ influenza_UP000009255_211044_DNA.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "Influenza genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/recipes/Influenza/influenza_UP000009255_genome_sequences.json", + "filename": "influenza_UP000009255_genome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/influenza_a/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/influenza_UP000009255_211044_DNA.fasta", + "filename": "influenza_UP000009255_211044_DNA.fasta", + "access_time": "2021-12-01T15:20:13.614Z" + } + ], + "version": "1.1" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000009255_211044_DNA.fasta", + "filename": "UP000009255_211044_DNA.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.1.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Influenza A", + "category_name": "species" + }, + { + "category_value": "nucleotide", + "category_name": "molecule" + }, + { + "category_value": "Influenza A", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000000/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:10:18.007Z" + } + }, + { + "model": "api.bco", + "pk": 8, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea", + "provenance_domain": { + "name": "HCV1a ledipasvir resistance SNP detection", + "version": "1.0", + "created": "2017-01-24T09:40:17-0500", + "modified": "2022-06-28T23:12:50.369Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff. Waiting for approval from FDA Reviewer", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + }, + { + "status": "approved", + "reviewer_comment": "The revised BCO looks fine", + "date": "2017-12-12T12:30:48-0400", + "reviewer": { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Eric Donaldson", + "affiliation": "FDA", + "email": "Eric.Donaldson@fda.hhs.gov", + "contribution": [ + "authoredBy" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", + "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", + "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus", + "GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" + ], + "description_domain": { + "keywords": [ + "HCV1a", + "Ledipasvir", + "antiviral resistance", + "SNP", + "amino acid substitutions" + ], + "platform": [ + "HIVE" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "prerequisite": [ + { + "name": "Hepatitis C virus genotype 1", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus type 1b complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus clone J8CF, complete genome", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "Hepatitis C virus S52 polyprotein gene", + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "input_list": [ + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514683", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/dna.cgi?cmd=objFile&ids=514682", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-heptagon", + "description": "variant calling", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ], + "output_list": [ + { + "uri": "http://example.com/data/514801/SNPProfile.csv", + "access_time": "2017-01-24T09:40:17-0500" + }, + { + "uri": "http://example.com/data/14769/allCount-aligned.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://example.com/workflows/antiviral_resistance_detection_hive.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500", + "sha1_checksum": "d60f506cddac09e9e816531e7905ca1ca6641e3c" + } + }, + { + "name": "HIVE-heptagon", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-heptagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "http://example.com/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "HOSTTYPE": "x86_64-linux", + "EDITOR": "vim" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "1" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "1" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "1" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/fhir/fhir_extension.json", + "fhir_extension": [ + { + "fhir_endpoint": "http://fhirtest.uhn.ca/baseDstu3", + "fhir_version": "3", + "fhir_resources": [ + { + "fhir_resource": "Sequence", + "fhir_id": "21376" + }, + { + "fhir_resource": "DiagnosticReport", + "fhir_id": "6288583" + }, + { + "fhir_resource": "ProcedureRequest", + "fhir_id": "25544" + }, + { + "fhir_resource": "Observation", + "fhir_id": "92440" + }, + { + "fhir_resource": "FamilyMemberHistory", + "fhir_id": "4588936" + } + ] + } + ] + }, + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/example/repo1", + "scm_type": "git", + "scm_commit": "c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21", + "scm_path": "workflow/hive-viral-mutation-detection.cwl", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000001/1.0", + "owner_group": "test50", + "owner_user": "test50", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:13:13.859Z" + } + }, + { + "model": "api.bco", + "pk": 9, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", + "etag": "caed07395b6afb58c8810d174a315260124f687740bc3bb14387de5e84c7e3d4", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "provenance_domain": { + "name": "Healthy human fecal metagenomic diversity", + "version": "1.0.0", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "reviewer": { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + } + } + ], + "obsolete_after": "2118-09-26T14:43:43-0400", + "embargo": { + "start_time": "2000-09-26T14:43:43-0400", + "end_time": "2000-09-26T14:43:45-0400" + }, + "created": "2018-11-29T11:29:08-0500", + "modified": "2018-11-30T11:29:08-0500", + "contributors": [ + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "Raja Mazumder", + "affiliation": "George Washington University", + "email": "mazumder@gwu.edu", + "contribution": [ + "createdBy", + "curatedBy", + "authoredBy" + ], + "orcid": "https://orcid.org/0000-0001-88238-9945" + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Identify the most common organism present in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "Identify the general community composition of organisms in a human [taxID:9606] fecal [UBERON:0001988] sample, ", + "CensuScope is used to do a census of the composition of the read files. Based on a user-defined threshold, organisms identified are used for alignment in the Hexagon alignment." + ], + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/biocompute-objects/HIVE_metagenomics", + "scm_type": "git", + "scm_commit": "e4620f642fb20557f6c679397696614305ed07b1", + "scm_path": "biocompute-objects/HIVE_metagenomics", + "scm_preview": "https://github.com/example/repo1/blob/c9ffea0b60fa3bcf8e138af7c99ca141a6b8fb21/workflow/hive-viral-mutation-detection.cwl" + } + } + ], + "description_domain": { + "keywords": [ + "metagenome", + "metagenomic analysis", + "fecal" + ], + "xref": [ + { + "namespace": "uberon", + "name": "Uber Anatomy Ontology", + "ids": [ + "0001988" + ], + "access_time": "2016-11-30T06:46-0500" + }, + { + "namespace": "taxonomy", + "name": "Taxonomy", + "ids": [ + "9606" + ], + "access_time": "2016-11-30T06:46-0500" + } + ], + "platform": [ + "hive" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "CensuScope", + "description": "Detect taxonomic composition of a metagenomic data set.", + "version": "1.3", + "prerequisite": [ + { + "name": "Filtered_NT_feb18_2016", + "uri": { + "uri": "https://hive.biochemistry.gwu.edu/genome/513957", + "access_time": "2016-11-30T06:46-0500" + } + } + ], + "input_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546223/dnaAccessionBasedResult.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + }, + { + "step_number": 2, + "name": "HIVE-hexagon", + "description": "Alignment of reads to a set of references", + "version": "1.3", + "input_list": [ + { + "uri": "http://example.com/data/546223/dnaAccessionBased.csv", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545722", + "access_time": "2016-11-30T06:46-0500" + }, + { + "uri": "https://hive.biochemistry.gwu.edu/nuc-read/545721", + "access_time": "2016-11-30T06:46-0500" + } + ], + "output_list": [ + { + "uri": "https://hive.biochemistry.gwu.edu/546232/alCount-Unalignedo524569-alCount--1.csv", + "access_time": "2016-11-30T06:46-0500" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/biocompute-objects/HIVE_metagenomics/blob/master/driverHIVEmetagenomic.py" + } + } + ], + "script_driver": "shell", + "software_prerequisites": [ + { + "name": "CensuScope", + "version": "albinoni.2", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-screening&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "name": "HIVE-hexagon", + "version": "babajanian.1", + "uri": { + "uri": "http://example.com/dna.cgi?cmd=dna-hexagon&cmdMode=-", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "external_data_endpoints": [ + { + "name": "HIVE", + "url": "https://hive.biochemistry.gwu.edu/dna.cgi?cmd=login" + }, + { + "name": "access to e-utils", + "url": "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/" + } + ], + "environment_variables": { + "key": "HOSTTYPE", + "value": "x86_64-linux" + } + }, + "parametric_domain": [ + { + "param": "seed", + "value": "14", + "step": "2" + }, + { + "param": "minimum_match_len", + "value": "66", + "step": "2" + }, + { + "param": "divergence_threshold_percent", + "value": "0.30", + "step": "2" + }, + { + "param": "minimum_coverage", + "value": "15", + "step": "2" + }, + { + "param": "freq_cutoff", + "value": "0.10", + "step": "2" + } + ], + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Hepatitis C virus genotype 1", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/22129792", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus type 1b complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/5420376", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus (isolate JFH-1) genomic RNA", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/13122261", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus clone J8CF, complete genome", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/386646758", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "Hepatitis C virus S52 polyprotein gene", + "uri": "http://www.ncbi.nlm.nih.gov/nuccore/295311559", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-01", + "uri": "http://example.com/nuc-read/514682", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "uri": { + "filename": "HCV1a_drug_resistant_sample0001-02", + "uri": "http://example.com/nuc-read/514683", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514769/dnaAccessionBased.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://example.com/data/514801/SNPProfile*.csv", + "access_time": "2017-01-24T09:40:17-0500" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "false_negative_alignment_hits": "<0.0010", + "false_discovery": "<0.05" + }, + "algorithmic_error": { + "false_positive_mutation_calls_discovery": "<0.00005", + "false_discovery": "0.005" + } + } + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000002/1.0", + "owner_group": "hivelab37", + "owner_user": "hivelab37", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:19:53.938Z" + } + }, + { + "model": "api.bco", + "pk": 10, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "2d96bb6e18aed202332d66f4ef9f909cf419d72bd33af6833957ad372d8f7d1f", + "provenance_domain": { + "name": "SARS-CoV-2 reference proteome sequences", + "version": "1.0", + "created": "2021-12-16T21:06:50.969977Z", + "modified": "2022-06-28T23:21:13.091Z", + "review": [], + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "curatedBy", + "importedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + } + ], + "license": "MIT" + }, + "usability_domain": [ + "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", + "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." + ], + "description_domain": { + "keywords": [ + "SARS-CoV-2, COVID-19, Complete Proteome, FASTA, Proteins" + ], + "platform": [], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Download all available files from UniProt", + "description": "Download all files associated with the SARS-Cov-2 reference genome (UniProt ID: UP000464024) into the ARGOS Dev server Downloads folder. While logged into the server, execute the following commands: wget ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/*. One of the files acquired through this step, and necessary for generating a new data set is 'UP000464024_2697049.fasta.gz'. Then execute 'gunzip *.gz' to unzip all the files in the downloads folder. The file name is then changed to 'UP000464024_2697049.fasta' in the downloads folder.", + "prerequisite": [ + { + "name": "UniProt reference page ", + "uri": { + "uri": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/Viruses/UP000464024/", + "access_time": "2021-12-16T21:06:50.969977Z" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta.gz", + "filename": "UP000464024_2697049.fasta.gz", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + }, + { + "step_number": 2, + "name": "Run the recipe created to process this fasta file, review the newly generated dataset, and change the name of the file for clarity", + "description": "This step will use a recipe and a python script to generate a new dataset. The recipe tells the python script how and what to construct. This dataset will then be then moved in the 'unreviewed' folder in the dev argosdb server, it will be manually reviewed, and then the name of the file will be changed for clarity and tracking purposes - this is prefered. Make sure you are located in the correct folder to run the script (/software/argosdb/dataset-maker). Use the following command to run the recipe and the python script: ‘python3 make-dataset.py -i recipes/sars-cov-2_UP000464024_proteome_sequences.json’. Next, go to the ‘unreviewed’ folder to review the newly generated dataset ‘UP000464024_2697049.fasta’. Once reviewed and approved, move the file to the ‘reviewed’ folder. Lastly, once in the ‘reviewed’ folder, change the name of the file to: ‘sars-cov-2_UP000464024_2697049.fasta’", + "prerequisite": [ + { + "name": "Dataset-maker python script", + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + }, + { + "name": "SARS-CoV-2 genome FASTA recipe", + "uri": { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb//generated/datasets/recipes/sars-cov-2_UP000464024_proteome_sequences.json", + "filename": "sars-cov-2_UP000464024_proteome_sequences.json" + } + } + ], + "input_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/downloads/uniprot/v1.0/sars_cov_2/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "output_list": [ + { + "uri": "ftp://argosdb-vm-dev/data/shared/argosdb/generated/datasets/reviewed/sars-cov-2_UP000464024_2697049.fasta", + "filename": "sars-cov-2_UP000464024_2697049.fasta", + "access_time": "2021-12-16T21:06:50.969977Z" + } + ], + "version": "1.0" + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "ftp://argosdb-vm-dev/software/argosdb/make-dataset.py", + "filename": "make-dataset.py" + } + } + ], + "script_driver": "python3", + "software_prerequisites": [ + { + "name": "Python", + "version": "3.10.0", + "uri": { + "uri": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe", + "filename": "" + } + } + ], + "external_data_endpoints": [ + { + "name": "python-3.10.0", + "url": "https://www.python.org/ftp/python/3.10.0/python-3.10.0-amd64.exe" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/plain", + "uri": { + "uri": "http://data.argosdb.org/ln2data/uniprot/v1.0/UP000464024_2697049.fasta", + "filename": "UP000464024_2697049.fasta" + } + } + ] + }, + "parametric_domain": [], + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "SARS-CoV-2", + "category_name": "species" + }, + { + "category_value": "protein", + "category_name": "molecule" + }, + { + "category_value": "SARS-CoV-2", + "category_name": "tag" + }, + { + "category_value": "fasta", + "category_name": "file_type" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "reviewed", + "category_name": "status" + }, + { + "category_value": "internal", + "category_name": "scope" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/BCO_000003/1.0", + "owner_group": "jdoe58", + "owner_user": "jdoe58", + "prefix": "BCO", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:21:56.879Z" + } + }, + { + "model": "api.bco", + "pk": 11, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "39fb1c62f43ff72ac95f91a433d5e425fb08bc07ec0f719ecfd27fb3cd3a3635", + "provenance_domain": { + "name": "Lineage assignment for an isolate of M. tuberculosis based on its single nucleotide polymorphism (SNP) profile based on UVC v1.0.", + "version": "1.0", + "created": "2017-11-12T12:30:48-0400", + "modified": "2022-06-28T23:41:33.439Z", + "review": [ + { + "status": "approved", + "reviewer_comment": "Approved by GW staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "date": "2017-11-12T12:30:48-0400", + "reviewer": { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "curatedBy" + ] + } + }, + { + "status": "approved", + "date": "2017-11-12T12:30:48-0400", + "reviewer_comment": "Approved by Critical Path Institute staff.", + "reviewer": { + "name": "Kenneth Ramey", + "affiliation": "Critical Path Institute", + "email": "kramey@c-path.org", + "contribution": [ + "curatedBy" + ] + } + } + ], + "contributors": [ + { + "name": "Matthew Ezewudo", + "affiliation": "Critical Path Institute", + "email": "mezewudo@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Jamie Posie", + "affiliation": "CDC Atlanta, GA", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Anjan Purkayastha", + "affiliation": "George Washington University", + "email": "anjan.purkayastha@gmail.com", + "contribution": [ + "authoredBy", + "curatedBy" + ] + }, + { + "name": "Marco Schito", + "affiliation": "Critical Path Institute", + "email": "mschito@c-path.org", + "contribution": [ + "authoredBy" + ] + }, + { + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "contribution": [ + "authoredBy", + "curatedBy" + ], + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "name": "ReseqTB Consortium", + "affiliation": "Critical Path Institute", + "email": "info@c-path.org", + "contribution": [ + "createdAt" + ] + } + ], + "license": "https://spdx.org/licenses/CC-BY-4.0.html" + }, + "usability_domain": [ + "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." + ], + "description_domain": { + "keywords": [ + "Mycobacterium tuberculosis", + "Phylogenetics", + "Bacterial lineage analysis", + "Single Nucleotide Polymorphism", + "SNP" + ], + "platform": [ + "Linux" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "FastQValidator", + "description": "To verify if input file is in fastq format", + "version": "1.0.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/validation/Validation_report.txt" + } + ] + }, + { + "step_number": 2, + "name": "FastQC", + "description": "assess Quality of raw sequence reads", + "version": "0.11.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_1_fastqc.zip" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.html" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/fastqc/ERR552106_2_fastqc.zip" + } + ] + }, + { + "step_number": 3, + "name": "Kraken", + "description": "Assesses species specificity of sequence reads", + "version": "0.10.5", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/kraken/final_report.txt" + } + ] + }, + { + "step_number": 4, + "name": "BWA", + "description": "Aligns sequence reads to reference genome", + "version": "0.7.12", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ] + }, + { + "step_number": 5, + "name": "Qualimap", + "description": "Assess mapping quality of aligned reads", + "version": "2.1.1", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/agogo.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/ajax-loader.gif" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/basic.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgfooter.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/bgtop.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-bright.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment-close.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/comment.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/doctools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/down.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/file.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/jquery.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/minus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/plus.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/pygments.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/qualimap_logo_small.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/report.css" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/searchtools.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/underscore.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up-pressed.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/up.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/css/websupport.js" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_0to50_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_coverage_quotes.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_gc_content_per_window.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_homopolymer_indels.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_insert_size_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_across_reference.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_mapping_quality_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_clipping_profile.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_reads_content_per_read_position.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/images_qualimapReport/genome_uniq_read_starts_histogram.png" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/coverage_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/duplication_rate_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/genome_fraction_coverage.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/homopolymer_indels.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/insert_size_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_clipping_profile.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_gc-content_distribution.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapped_reads_nucleotide_content.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_across_reference.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/raw_data_qualimapReport/mapping_quality_histogram.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/genome_results.txt" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/qualimap/qualimapReport.html" + } + ] + }, + { + "step_number": 6, + "name": "MarkDuplicates", + "description": "Removes duplicate reads from alignment", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ] + }, + { + "step_number": 7, + "name": "IndelRealigner", + "description": "Perfoms re-alignment around insertions and deletions", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.no_dups.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ] + }, + { + "step_number": 8, + "name": "BaseRecalibrator", + "description": "Recalibrates base quality scores", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "name": "Variation sites file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.realigned.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ] + }, + { + "step_number": 9, + "name": "BuildBamIndex", + "description": "Indexes sorted BAM files for variant calling", + "version": "1.134", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bai" + } + ] + }, + { + "step_number": 10, + "name": "UnifiedGenotyper", + "description": "Calls variant positions in alignment", + "version": "3.4.0", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv genome reference file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + }, + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.mpileup" + } + ] + }, + { + "step_number": 11, + "name": "VCFtools", + "description": "Filters raw VCF to exclude poor quality variants", + "version": "0.1.12b", + "prerequisite": [ + { + "name": "Excluded list file", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ] + }, + { + "step_number": 12, + "name": "SnpEff", + "description": "Annotates variants in VCF file", + "version": "4.1", + "prerequisite": [ + { + "name": "M. tuberculosis H37Rv GenBank File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ] + }, + { + "step_number": 13, + "name": "parse_annotation.py", + "description": "Parses annotated VCF to create annotation text file", + "version": "", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_annotated.vcf" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ] + }, + { + "step_number": 14, + "name": "lineage_parser.py", + "description": "Assigns Mycobacterium tuberculosis Complex lineage to isolate", + "version": "", + "prerequisite": [ + { + "name": "Lineage Markers File", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + } + ], + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Final_annotation.txt" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + ] + }, + { + "step_number": 15, + "name": "BEDtools", + "description": "Creates loci based coverage statistics of genome coverage", + "version": "2.17.0", + "input_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/bam_files/ERR552106.recalibrated.bam" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + ] + }, + { + "step_number": 16, + "name": "resis_parser.py", + "description": "Creates a coverage depth and width table of all loci in isolate genome", + "version": "", + "input_list": [ + { + "uri": "[path_to_genome_loci_text_file]" + }, + { + "uri": "[path_to_per_position_depth_text_file]" + } + ], + "output_list": [ + { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://github.com/CPTR-ReSeqTB/UVP/commit/9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3" + } + } + ], + "script_driver": "Python", + "software_prerequisites": [ + { + "name": "BEDtools", + "version": "2.17.0", + "uri": { + "uri": "https://github.com/arq5x/bedtools/releases/tag/v2.17.0", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "5e4507c54355a4a38c6d3e7497a2836a123c6655" + } + }, + { + "name": "Bcftools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/bcftools/releases/download/1.2/bcftools-1.2.tar.bz2", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "352908143497da0640b928248165e83212dc4298" + } + }, + { + "name": "BWA", + "version": "0.7.12", + "uri": { + "uri": "https://sourceforge.net/projects/bio-bwa/files/bwa-0.7.12.tar.bz2/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "6389ca75328bae6d946bfdd58ff4beb0feebaedd" + } + }, + { + "name": "FastQC", + "version": "0.11.5", + "uri": { + "uri": "https://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/fastq_screen_v0.13.0.tar.gz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "GATK", + "version": "3.4.0", + "uri": { + "uri": "https://github.com/broadgsa/gatk-protected/releases/tag/3.4", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "f19618653a0d23baaf147efe7f14aeb4eeb0cbb8" + } + }, + { + "name": "Kraken", + "version": "0.10.5", + "uri": { + "uri": "https://ccb.jhu.edu/software/kraken/dl/kraken-0.10.5-beta.tgz", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Picard", + "version": "1.134", + "uri": { + "uri": "https://github.com/broadinstitute/picard/releases/tag/1.134", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "a7a08c474e4d99346eec7a9956a8fe71943b5d80" + } + }, + { + "name": "Pigz", + "version": "2.3.3", + "uri": { + "uri": "http://springdale.math.ias.edu/data/puias/unsupported/7/SRPMS/pigz-2.3.3-1.sdl7.src.rpm", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Qualimap", + "version": "2.11", + "uri": { + "uri": "https://bitbucket.org/kokonech/qualimap/downloads/qualimap_v2.1.1.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "Samtools", + "version": "1.2", + "uri": { + "uri": "https://github.com/samtools/samtools/archive/1.2.zip", + "access_time": "2018-10-08T18:35:33-0400" + } + }, + { + "name": "SnpEff", + "version": "4.1", + "uri": { + "uri": "https://sourceforge.net/projects/snpeff/files/snpEff_v4_1l_core.zip/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "c96e21564b05d6a7912e4dd35f9ef6fe2e094fbb" + } + }, + { + "name": "Vcftools", + "version": "0.1.12b", + "uri": { + "uri": "https://sourceforge.net/projects/vcftools/files/vcftools_0.1.12.tar.gz/download", + "access_time": "2018-10-08T18:35:33-0400", + "sha1_checksum": "29a1ab67786e39be57cbb1ef4e0f6682110b7516" + } + } + ], + "external_data_endpoints": [ + { + "name": "BCOReSeqTB", + "url": "https://github.com/CPTR-ReSeqTB/UVP/" + } + ], + "environment_variables": { + "CORE": "8" + } + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.fa" + } + }, + { + "uri": { + "filename": "Mycobacterium tuberculosis H37Rv, complete genome", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/NC_000962.gbk" + } + }, + { + "uri": { + "filename": "excluded_loci", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/excluded_loci.txt" + } + }, + { + "uri": { + "filename": "lineage_markers", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/lineage_markers.txt" + } + }, + { + "uri": { + "filename": "variation sites", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_reference_files/snps.vcf" + } + }, + { + "uri": { + "filename": "ERR552106_2.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_2.fastq.gz" + } + }, + { + "uri": { + "filename": "ERR552106_1.fastq.gz", + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_input_fastq_files/ERR552106_1.fastq.gz" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.lineage_report.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106.log" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Coverage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106ERR552106_Final_annotation.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_GATK_filtered.vcf" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_Lineage.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_deleted_loci.txt" + } + }, + { + "mediatype": "text/csv", + "uri": { + "uri": "http://bco.reseqtb.org/UVP-BCO/UVPv2.4.1_sample_results/ERR552106/ERR552106_genome_region_coverage.txt" + } + } + ] + }, + "error_domain": { + "empirical_error": { + "description": [ + "This test object represents tests done with single lineage sequences to establish the sensitivity of UVP to detect lineage and antibiotic resistant variants", + "Test objective was to evaluate the ability of UVP to identify strain lineage and antibiotic resistant variants from samples of high, medium, low sequence qualities and depths of coverage of 10, 15, 20, 25 and 30-fold. Simulated reads developed from 12 lineage-specific M. tuberculosis (Mtb) genome sequences were used to test UVP." + ], + "parameters": { + "sample_type": "single Mtb lineages (n = 12) with antibiotic resistant variants introduced in silico", + "total_sample_size": "180", + "platform": "Illumina HiSeq 2000", + "paired_end": true, + "length": "100", + "simulated": true, + "program": "ART", + "simulator_parameters": [ + { + "ss": "hs20" + }, + { + "l": "100" + }, + { + "m": "500" + }, + { + "qU": "45" + }, + { + "s": "100" + } + ], + "sequence_quality_level_parameters": { + "description": "these correspond to the ART parameters: qs, qs2, ir, ir2, dr, dr2.", + "sequence_quality_high": { + "substitution_error_rate_R1": "0.0004", + "substitution_error_rate_R2": "0.0007", + "insertion_error_rate_R1": "0.00009", + "insertion_error_rate_R2": "0.00015", + "deletion_error_rate_R1": "0.00011", + "deletion_error_rate_R2": "0.00023", + "units": "errors per sequenced base" + }, + "sequence_quality_medium": { + "substitution_error_rate_R1": "0.004", + "substitution_error_rate_R2": "0.007", + "insertion_error_rate_R1": "0.0009", + "insertion_error_rate_R2": "0.0015", + "deletion_error_rate_R1": "0.0011", + "deletion_error_rate_R2": "0.0023", + "units": "errors per sequenced base" + }, + "sequence_quality_low": { + "substitution_error_rate_R1": "0.04", + "substitution_error_rate_R2": "0.07", + "insertion_error_rate_R1": "0.009", + "insertion_error_rate_R2": "0.015", + "deletion_error_rate_R1": "0.011", + "deletion_error_rate_R2": "0.023", + "units": "errors per sequenced base" + } + } + }, + "summary results": { + "sequence_quality_high": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "93.33", + "mean_AR_identification_rate": "86.72", + "Units": "Percentage" + } + }, + "sequence_quality_medium": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "90.00", + "mean_AR_identification_rate": "81.00", + "Units": "Percentage" + } + }, + "sequence_quality_low": { + "sample size": "60", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_10": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "41.67", + "mean_AR_identification_rate": "22.42", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "63.89", + "mean_AR_identification_rate": "57.14", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.46", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "36", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "66.66", + "Units": "Percentage" + } + } + }, + "detailed results": [ + { + "sequence_quality_high": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "66.67", + "mean_AR_identification_rate": "40.75", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "92.85", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_medium": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "58.34", + "mean_AR_identification_rate": "26.50", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "91.66", + "mean_AR_identification_rate": "78.57", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "99.40", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "100.00", + "mean_AR_identification_rate": "100.00", + "Units": "Percentage" + } + } + } + }, + { + "sequence_quality_low": { + "coverage_10": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_15": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_20": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_25": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + }, + "coverage_30": { + "sample size": "12", + "result": { + "lineage_assignment_rate": "0.00", + "mean_AR_identification_rate": "0.00", + "Units": "Percentage" + } + } + } + } + ] + }, + "algorithmic_error": { + "placeholder": "for algorithmic error domain" + } + }, + "extension_domain": [ + { + "extension_schema": "https://raw.githubusercontent.com/biocompute-objects/extension_domain/1.1.0/scm/scm_extension.json", + "scm_extension": { + "scm_repository": "https://github.com/CPTR-ReSeqTB/UVP", + "scm_type": "git", + "scm_commit": "9e8f588b3cd3f5eebde29f7d2879e1a1e1c1aed3", + "scm_path": "UVP/scripts/UVP.py" + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/OTHER_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "bco_api_user", + "prefix": "OTHER", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:41:49.719Z" + } + }, + { + "model": "api.bco", + "pk": 12, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.0", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.0", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.bco", + "pk": 13, + "fields": { + "contents": { + "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "998f0b8a499bd88952f945367c17d62e95611c9d026fb0713b84a5fa9bfb745d", + "provenance_domain": { + "name": "ARGOSdb QC related annotation data property list", + "version": "1.2", + "created": "2022-02-07T17:36:05.872Z", + "modified": "2022-02-15T14:35:54.116922", + "contributors": [ + { + "contribution": [ + "createdBy", + "authoredBy", + "contributedBy" + ], + "name": "Charles Hadley King", + "affiliation": "George Washington University", + "email": "hadley_king@gwu.edu", + "orcid": "https://orcid.org/0000-0003-1409-4549" + }, + { + "contribution": [ + "curatedBy", + "contributedBy" + ], + "name": "Stephanie Singleton", + "affiliation": "The George Washington University ", + "email": "ssingleton@gwu.edu" + }, + { + "contribution": [ + "createdBy", + "curatedBy" + ], + "name": "Jonathon Keeney", + "affiliation": "The George Washington University ", + "email": "keeneyjg@gwu.edu" + }, + { + "name": "Raja Mazumder", + "contribution": [ + "curatedBy" + ], + "affiliation": "The George Washington University ", + "email": "mazumder@gwu.edu", + "orcid": "https://orcid.org/0000-0001-8823-9945" + } + ], + "license": "https://github.com/FDA-ARGOS/data.argosdb/blob/v0.4_Feb/LICENSE" + }, + "usability_domain": [ + "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", + "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", + "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", + "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." + ], + "description_domain": { + "keywords": [ + "curation", + "definitions", + "ontology", + "controlled vocabulary" + ], + "pipeline_steps": [ + { + "step_number": 1, + "name": "Header download", + "description": "Create a text file with the headers from each data sheet published for v0.3. Using the command `head -1 > ~/headers.txt`", + "prerequisite": [], + "input_list": [ + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + }, + { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + ], + "output_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt", + "filename": "headers.txt" + } + ] + }, + { + "step_number": 2, + "name": "Manual Curation", + "description": "Manual curation of headers.txt into a curated list of terms with definitions.", + "prerequisite": [], + "input_list": [ + { + "uri": "https://argosdb-vm-dev/software/argosdb/home/headers.txt" + } + ], + "output_list": [ + { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + ] + } + ] + }, + "execution_domain": { + "script": [ + { + "uri": { + "uri": "https://docs.google.com/spreadsheets/d/1EbHiGSmv6ZTGk6erQCl1oH8ye7EFuQPwKfWS5_7Cn60/edit#gid=0", + "filename": "FINAL_v0.3_argos_dict" + } + } + ], + "script_driver": "Google Drive/Sheets", + "software_prerequisites": [ + { + "name": "Microsof Excel", + "version": "16.57", + "uri": { + "uri": "https://www.microsoft.com/en-us/microsoft-365/excel" + } + } + ], + "external_data_endpoints": [ + { + "name": "data.ARGOSdb.org", + "url": "data.ARGOSdb.org" + }, + { + "name": "Google Drive", + "url": "https://drive.google.com/drive/u/3/folders/1uUa4UYG3dd6yTOdxiyoav6qktit4-J-9" + } + ], + "environment_variables": {} + }, + "io_domain": { + "input_subdomain": [ + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/SRA_ngsQC.tsv", + "filename": "SRA_ngsQC.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/PRJNA231221_AssemblyUpdated.tsv", + "filename": "PRJNA231221_AssemblyUpdated.tsv" + } + }, + { + "uri": { + "uri": "http://data.argosdb.org/ln2downloads/argosdb_qc/02_15_2022/sars-cov-2_lineage_mutations.tsv", + "filename": "sars-cov-2_lineage_mutations.tsv" + } + } + ], + "output_subdomain": [ + { + "mediatype": "text/tsv", + "uri": { + "uri": "https://data.argosdb.org/ ln2data/02_15_2022/annotation_property_list.tsv", + "access_time": "2022-02-03T13:42:44-0500", + "filename": "annotation_property_list.tsv" + } + } + ] + }, + "parametric_domain": [], + "error_domain": { + "empirical_error": {}, + "algorithmic_error": {} + }, + "extension_domain": [ + { + "extension_schema": "http://www.w3id.org/biocompute/extension_domain/1.2.0/dataset/dataset_extension.json", + "dataset_extension": { + "additional_license": { + "data_license": "https://creativecommons.org/licenses/by/4.0/", + "script_license": "https://www.gnu.org/licenses/gpl-3.0.en.html" + }, + "dataset_categories": [ + { + "category_value": "Other", + "category_name": "species" + }, + { + "category_value": "Other", + "category_name": "molecule" + }, + { + "category_value": "non-core", + "category_name": "priority" + }, + { + "category_value": "Dictionary", + "category_name": "species" + }, + { + "category_value": "tsv", + "category_name": "file_type" + }, + { + "category_value": "reviewed", + "category_name": "status" + } + ] + } + } + ] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000001/1.2", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "TEST", + "schema": "IEEE", + "state": "PUBLISHED", + "last_update": "2022-06-28T23:44:58.161Z" + } + }, + { + "model": "api.bco", + "pk": 14, + "fields": { + "contents": { + "object_id": "", + "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", + "etag": "da75a2c36dd6bf449d1f7b150197096e11c51812", + "provenance_domain": { + "name": "", + "version": "", + "license": "", + "created": "2023-09-05T18:10:23", + "modified": "2023-09-05T18:10:23.167Z", + "contributors": [ + { + "name": "", + "affiliation": "", + "email": "", + "contribution": [], + "orcid": "" + } + ] + }, + "usability_domain": [], + "description_domain": { + "pipeline_steps": [] + }, + "parametric_domain": [], + "io_domain": {}, + "execution_domain": { + "script": [], + "script_driver": "", + "software_prerequisites": [], + "external_data_endpoints": [], + "environment_variables": {} + }, + "extension_domain": [] + }, + "object_class": null, + "object_id": "http://127.0.0.1:8000/TEST_000002/DRAFT", + "owner_group": "bco_api_user", + "owner_user": "test50", + "prefix": "DRAFT", + "schema": "IEEE", + "state": "DRAFT", + "last_update": "2023-09-24T09:16:04.123Z" + } + } +] \ No newline at end of file diff --git a/tests/fixtures/test_data.json b/tests/fixtures/test_data.json index 2bc23342..5410a5d9 100644 --- a/tests/fixtures/test_data.json +++ b/tests/fixtures/test_data.json @@ -757,110 +757,6 @@ ] } }, - { - "model": "contenttypes.contenttype", - "pk": 1, - "fields": { - "app_label": "admin", - "model": "logentry" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 2, - "fields": { - "app_label": "auth", - "model": "permission" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 3, - "fields": { - "app_label": "auth", - "model": "group" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 4, - "fields": { - "app_label": "auth", - "model": "user" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 5, - "fields": { - "app_label": "contenttypes", - "model": "contenttype" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 6, - "fields": { - "app_label": "sessions", - "model": "session" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 7, - "fields": { - "app_label": "authtoken", - "model": "token" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 8, - "fields": { - "app_label": "authtoken", - "model": "tokenproxy" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 9, - "fields": { - "app_label": "blacklist", - "model": "blacklistedtoken" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 10, - "fields": { - "app_label": "authentication", - "model": "authentication" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 11, - "fields": { - "app_label": "authentication", - "model": "newuser" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 12, - "fields": { - "app_label": "biocompute", - "model": "bco" - } - }, - { - "model": "contenttypes.contenttype", - "pk": 13, - "fields": { - "app_label": "prefix", - "model": "prefix" - } - }, { "model": "authtoken.token", "pk": "1ef53d4042d14299918a4e1f21d2be128a2a7427", diff --git a/tests/test_apis/test_api_authentication/test_api_auth_add.py b/tests/test_apis/test_api_authentication/test_api_auth_add.py deleted file mode 100644 index a7843b3b..00000000 --- a/tests/test_apis/test_api_authentication/test_api_auth_add.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python3 - -"""Add Authentication -Tests for 'New authentication credentials added to existing object' (200), -'Authentication credentials were created and added' (201), 'Bad request' (400), -'That object already exists for this account' (409) -""" - -from django.test import TestCase, Client -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from authentication.models import Authentication - -class AuthenticationTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_credentials_created_response(self): - """Add authentication is successful (200) - """ - - token = Token.objects.get(user=User.objects.get(username='tester')).key - data = {"iss": "Reeya1","sub": "ReeyaGupta1"} - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/add/', data=data) - self.assertEqual(response.status_code, 201) - - def test_credentials_added(self): - """New authentication credentials added to existing object (200) - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = {"iss": "new","sub": "new One"} - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/add/', data=data, format='json') - self.assertEqual(response.status_code, 200) - - def test_bad_request_response(self): - """Bad request (400) - """ - - token = Token.objects.get(user=User.objects.get(username='tester')).key - data = {"Missing required fields"} - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/add/', data=data, format='json') - self.assertEqual(response.status_code, 400) - - def test_object_already_exists_response(self): - """That object already exists for this account (409) - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = {"iss": "Reeya1","sub": "ReeyaGupta1"} - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/add/', data=data, format='json') - self.assertEqual(response.status_code, 409) diff --git a/tests/test_apis/test_api_authentication/test_api_auth_reset_token.py b/tests/test_apis/test_api_authentication/test_api_auth_reset_token.py deleted file mode 100644 index 8ff77b20..00000000 --- a/tests/test_apis/test_api_authentication/test_api_auth_reset_token.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 - -"""Reset Token -Tests for 'Token reset is successful.' 200, and 'Bad request.', 400. -""" - -from django.test import TestCase, Client -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User - -class ResetTokenTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self) -> None: - self.client = APIClient() - - def test_reset_successful(self): - """Token reset is successful. 200 - """ - - token = Token.objects.get(user=User.objects.get(username='tester')).key - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/reset_token/') - self.assertEqual(response.status_code, 200) - - def test_invalid_token(self): - """Invalid token. 403 - """ - - token = 'this-is-an-invalid-token' - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/reset_token/') - self.assertEqual(response.status_code, 403) \ No newline at end of file diff --git a/tests/test_apis/test_auth_add.py b/tests/test_apis/test_api_authentication/test_auth_add.py similarity index 100% rename from tests/test_apis/test_auth_add.py rename to tests/test_apis/test_api_authentication/test_auth_add.py diff --git a/tests/test_apis/test_auth_reset_token.py b/tests/test_apis/test_api_authentication/test_auth_reset_token.py similarity index 100% rename from tests/test_apis/test_auth_reset_token.py rename to tests/test_apis/test_api_authentication/test_auth_reset_token.py diff --git a/tests/test_apis/test_api_authentication/test_api_auth_remove.py b/tests/test_apis/test_api_authentication/testi_auth_remove.py similarity index 100% rename from tests/test_apis/test_api_authentication/test_api_auth_remove.py rename to tests/test_apis/test_api_authentication/testi_auth_remove.py diff --git a/tests/test_apis/test_api_objects_drafts_create.py b/tests/test_apis/test_api_objects_drafts_create.py deleted file mode 100644 index b294e337..00000000 --- a/tests/test_apis/test_api_objects_drafts_create.py +++ /dev/null @@ -1,135 +0,0 @@ - -#!/usr/bin/env python3 - -"""Objects/Drafts_create -Tests for 'Creation of BCO draft is successful.' (200), -returns 207, 403 (needs to be reviewed) -""" - - -import json -from django.test import TestCase -from django.contrib.auth.models import User -from rest_framework.authtoken.models import Token -from rest_framework.test import APIClient - -class BcoDraftCreateTestCase(TestCase): - fixtures = ['tests/fixtures/test_data'] - def setUp(self): - self.client = APIClient() - - self.token = Token.objects.get(user=User.objects.get(username="tester")) - - self.legacy_data = { - "POST_api_objects_draft_create": [ - { - "prefix": "BCO", - "owner_group": "tester", - "object_id": "http://127.0.0.1:8000/BCO_000002/DRAFT", - "schema": "IEEE", - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" - } - } - ] - } - - self.data = [ - { - "object_id": "http://127.0.0.1:8000/BCO_000001/DRAFT", - "prefix": "BCO", - "authorized_users": ["hivelab"], - "contents": { - "object_id": "https://test.portal.biochemistry.gwu.edu/BCO_000001/DRAFT", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" - } - }, - { - "object_id": "http://127.0.0.1:8000/TEST_000001", - "prefix": "TEST", - "contents": { - "object_id": "https://biocomputeobject.org/TEST_000001", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" - } - } - ] - - def test_legacy_successful_creation(self): - """200: Creation of BCO drafts is successful. - """ - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', self.legacy_data, format='json') - self.assertEqual(response.status_code, 200) - - def test_successful_creation(self): - """200: Creation of BCO drafts is successful. - """ - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', self.data, format='json') - self.assertEqual(response.status_code, 200) - - def test_partial_failure(self): - # Test case for partial failure (response code 300) - ##Returns 207(Multi status) instead of 300(Partial faliure) - data = { - 'POST_api_objects_draft_create': [ - { - 'prefix': 'BCO', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - }, - { - 'prefix': 'Reeyaa', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - } - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', data=data, format='json') - self.assertEqual(response.status_code, 207) - - def test_bad_request(self): - # Test case for bad request (response code 400) - #Gives 403 forbidden request instead of 400 - data = [ - { - "object_id": "http://127.0.0.1:8000/TEST_000001", - "prefix": "TEST", - "contents": { - "object_id": "https://biocomputeobject.org/TEST_000001", - "spec_version": "https://w3id.org/ieee/ieee-2791-schema/2791object.json", - "etag": "11ee4c3b8a04ad16dcca19a6f478c0870d3fe668ed6454096ab7165deb1ab8ea" - } - } - ] - self.client.credentials(HTTP_AUTHORIZATION='Token ' + self.token.key) - response = self.client.post('/api/objects/drafts/create/', data=data, format='json') - self.assertEqual(response.status_code, 400) - - def test_invalid_token(self): - # Test case for invalid token (response code 403) - # Setting authentication token to an invalid value - - data = { - 'POST_api_objects_draft_create': [ - { - 'prefix': 'BCO', - 'owner_group': 'bco_drafter', - 'schema': 'IEEE', - 'contents': {} - }, - - ] - } - self.client.credentials(HTTP_AUTHORIZATION='Token InvalidToken') - response = self.client.post('/api/objects/drafts/create/', data=data, format='json') - self.assertEqual(response.status_code, 403) diff --git a/tests/test_apis/test_api_prefix/__init__.py b/tests/test_apis/test_api_prefix/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_apis/test_prefixes_create.py b/tests/test_apis/test_api_prefix/test_prefixes_create.py similarity index 96% rename from tests/test_apis/test_prefixes_create.py rename to tests/test_apis/test_api_prefix/test_prefixes_create.py index 78ee6013..1c60b030 100644 --- a/tests/test_apis/test_prefixes_create.py +++ b/tests/test_apis/test_api_prefix/test_prefixes_create.py @@ -63,8 +63,8 @@ def test_create_prefix_success(self): self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) legacy_response = self.client.post('/api/prefixes/create/', data=self.legacy_data, format='json') response = self.client.post('/api/prefixes/create/', data=self.data, format='json') - self.assertEqual(legacy_response.status_code, 200) - self.assertEqual(response.status_code, 200) + self.assertEqual(legacy_response.status_code, 201) + self.assertEqual(response.status_code, 201) def test_create_multi_status(self): """Tests for 'Some prefix creations failed. 207.' @@ -100,7 +100,7 @@ def test_create_multi_status(self): "prefixes": [ { "description": "Just a test prefix.", - "prefix": "testR" + "prefix": "test2" }, ] @@ -121,7 +121,7 @@ def test_create_multi_status(self): self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) response = self.client.post('/api/prefixes/create/', data=data, format='json') # 201: The prefix * was successfully created. - self.assertEqual(response.data[2]['TESTR']['status_code'], 201) + self.assertEqual(response.data[2]['TEST2']['status_code'], 201) # 400: Bad Request. The prefix * does not follow the naming rules for a prefix. self.assertIn('prefix', response.data[0]['INVALID-PREFIX']['data']) diff --git a/tests/test_apis/test_prefixes_modify.py b/tests/test_apis/test_api_prefix/test_prefixes_modify.py similarity index 100% rename from tests/test_apis/test_prefixes_modify.py rename to tests/test_apis/test_api_prefix/test_prefixes_modify.py diff --git a/tests/test_apis/test_auth_remove.py b/tests/test_apis/test_auth_remove.py deleted file mode 100644 index 150f13e5..00000000 --- a/tests/test_apis/test_auth_remove.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python3 - -"""Remove Authentication -Tests for 'Remove authentication is successful.` (200), 'Authentication -failed.' (403), and 'That object does not exist for this account.' (404) -""" - -from django.test import TestCase -from rest_framework.test import APIClient -from rest_framework.authtoken.models import Token -from django.contrib.auth.models import User -from rest_framework.test import APITestCase - -class AuthenticationRemovetestcase(APITestCase): - fixtures = ['tests/fixtures/test_data'] - - def setUp(self): - self.client = APIClient() - - def test_success_response(self): - """Remove authentication is successful. (200) - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - - data = {"iss": "Reeya1","sub": "ReeyaGupta1"} - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/remove/', data=data, format='json') - self.assertEqual(response.status_code, 200) - - def test_bad_authentication(self): - """Authentication failed. 403 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = {} - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/remove/', data=data) - self.assertEqual(response.status_code, 403) - - def test_object_already_exists_response(self): - """That object does not exist for this account. 404 - """ - - token = Token.objects.get(user=User.objects.get(username='bco_api_user')).key - data = {"iss": "Reeya2","sub": "ReeyaGupta2"} - - self.client.credentials(HTTP_AUTHORIZATION='Token ' + token) - response = self.client.post('/api/auth/remove/', data=data) - self.assertEqual(response.status_code, 404) diff --git a/tests/test_apis/test_biocompute/__init__.py b/tests/test_apis/test_biocompute/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_apis/test_objects_drafts_create.py b/tests/test_apis/test_biocompute/test_objects_drafts_create.py similarity index 100% rename from tests/test_apis/test_objects_drafts_create.py rename to tests/test_apis/test_biocompute/test_objects_drafts_create.py