From 9f9e23799f0d3667e8c6d20c76b007c6d7615b3a Mon Sep 17 00:00:00 2001 From: Kiran Sen Date: Mon, 22 Jul 2024 17:31:28 -0400 Subject: [PATCH 1/4] BCO scoring changes --- biocompute/services.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/biocompute/services.py b/biocompute/services.py index 1512502..95cb415 100644 --- a/biocompute/services.py +++ b/biocompute/services.py @@ -608,7 +608,7 @@ def bco_score(bco_instance: Bco) -> Bco: if "usability_domain" not in contents: bco_instance.score = 0 return bco_instance - + try: usability_domain_length = sum(len(s) for s in contents['usability_domain']) score = {"usability_domain_length": usability_domain_length} @@ -616,6 +616,28 @@ def bco_score(bco_instance: Bco) -> Bco: score = {"usability_domain_length": 0} usability_domain_length = 0 - bco_instance.score = usability_domain_length - + # Calculate the base score + base_score = usability_domain_length + + # Apply the field length modifier + field_length_modifier = 1.2 + base_score *= field_length_modifier + + # Check for the existence of the error domain + error_domain_exists = "error_domain" in contents + if error_domain_exists: + base_score += 5 + + # Apply the parametric object multiplier + parametric_object_count = len(contents.get('parametric_objects', [])) + parametric_object_multiplier = 1.1 + base_score *= (parametric_object_multiplier ** parametric_object_count) + + # Add score for each reviewer object (up to 5) + reviewer_object_count = min(5, len(contents.get('reviewer_objects', []))) + base_score += reviewer_object_count + + # Finalize the score + bco_instance.score = base_score + return bco_instance From 7c615c4ae90f569ecdcdc2a67055bac5a78b9153 Mon Sep 17 00:00:00 2001 From: hadleyking Date: Fri, 12 Jul 2024 08:11:35 -0400 Subject: [PATCH 2/4] Add New Production Deployment md Changes to be committed: deleted: docs/bco_scores.json modified: docs/deployment/productionDeployment.md new file: docs/newProductionInstance.md --- docs/bco_scores.json | 13216 ---------------------- docs/deployment/productionDeployment.md | 31 + docs/newProductionInstance.md | 1 + 3 files changed, 32 insertions(+), 13216 deletions(-) delete mode 100644 docs/bco_scores.json create mode 100644 docs/newProductionInstance.md diff --git a/docs/bco_scores.json b/docs/bco_scores.json deleted file mode 100644 index 9705f00..0000000 --- a/docs/bco_scores.json +++ /dev/null @@ -1,13216 +0,0 @@ -[ - { - "object_id": "https://biocomputeobject.org/BCO_000283/1.0", - "usability_domain": [ - "Until the present day, the majority of cancer genomic have been focusing on identifying the tumor gene and the pathways involving in tumor development. Despite of this being a huge success, there is still a little knowledge of why cancer patients with similar cancer driver genes may result in different disease outcome and/or drug responses. Therefore, there is a need of understanding deeper into this problem. First, it is essential to identify the genes that cause the cancer, or the cancer-driver genes. Therefore, the completion of The Cancer Genome Atlas (TCGA) and other large scale genomic project are important because those projects will provide a critical and essential data to identify driver mutation that lead to cancer. Although it a fast-growing project, the TCGA is far from completed because of the mutation diversity and it is difficult to increase the size of the datasets. A complementary approach toward that goal is to integrate cancer mutation profiles and increase the statistical power of analysis. Moreover, information on the structure of the proteins coded by genes is checked to see the enrichment in cancer mutation in specific regions.", - "In this publication, the authors introduce the extension of an e-Driver that use information on three-dimensional structures of the mutated proteins to identify specific structures. The algorithm analyzed if these structural feature are enriched in cancer somatic mutation and can become candidates of cancer-driver genes. The authors specifically pay attention on the protein-protein interaction (PPI) interfaces since a lot of cancer-driver genes are located in the important region of the PPI network. Here, the authors identified PPi interfaces in a total of 103 genes. 32 of these are well-known cancer-driver genes. The function of the remaining 71 still have to be verified experimentally. The authors also showed that depending on which interface or protein region is altered, tumors apparently driven by the same cancer gene may have different outcomes. ", - "The result assembling a data set of 5,989 tumors from 23 cancer types from the TCGA showed that the average number of missense mutation per sample is highly variable among cancer types, with melanoma is the highest (429 mutations per sample) and thyroid carcinoma is the lowest (11 mutations per sample). Mutations from 868,508 cancer datasets are distributed randomly across the proteome with 30% of mutations in structures and 6% in PPI interfaces (https://doi.org/10.1371/journal.pcbi.1004518.g001). In 103 interface driver genes in the Pan-cancer analysis, there is a huge overlap between the genes identified in this analysis and lists of the known cancer genes (https://doi.org/10.1371/journal.pcbi.1004518.g002) (https://doi.org/10.1371/journal.pcbi.1004518.g003). The result also emphasizes the differences and similarities across related driver genes. The analysis of 71 interface driver genes that are identified as not cancer-driver genes determine their potential roles in cancer. Furthermore, they also have function which is related to immunity", - "The raw data and the algorithm can be download from (http://github.com/eduardporta/e-Driver). The 3 level mutation data can be download from TCGA portal (https://tcga-data.nci.nih.gov) for 5,989 tumor samples that belongs to 23 different cancer types. 18,651 protein structures were identified from PDB (2014) for the analysis of residues implicated with PPI interfaces. The complete dataset containing PPI structures and models are from Interactome3D. e-Driver is used to identify interfaces that are enriched in somatic missense mutations,", - "This is the first time that 3D PPI interfaces have been used to identify genes across large cancer datasets. The analysis showed that cancer driver genes, such as TP35, HRAS, PIK3CA or EGFR can find relevant genes and interaction interfaces alteration is a common pathogenic mechanism of cancer somatic mutation. The authors also found that tumors with mutations in the same driver gene can have different behavior and outcomes, depend on the PPI interface affected by the mutation. This research has focused on the analysis and interpretation of of missense mutation. However, there are other types of variations that still can act as cancer drivers and have a huge impact on the outcomes of the patients.", - "There is no parameter that need to be changed" - ], - "score": { - "usability_domain_length": 4381 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000280/0.1.0", - "usability_domain": [ - "Tempora is a tool for creating cell lineage trajectories using single cell RNA sequencing (scRNA-seq) data. Many tools exist for creating lineage trajectories (Monocle, TSCAN, Slingshot, etc) along a so called pseudotemporal scale. The scale is arranged using a variety of methods such as using a minimum spanning tree (MST) method that arranges cell clusters along an axis according to their transcriptome similarity, determined from the scRNA-seq data, such that the expression differences (\u201ctime\u201d) between all clusters is as small as possible. This and other methods solidly assume that the differences in gene expression are correlated to where along the time-path of a particular lineage each cell exists. Tempora partially removes this assumption by incorporating temporal data collected during time-course scRNA-seq experiments. Tempora\u2019s novel innovation over existing time course tools like Waddinton-OT and CSHMM is creating enriched, redundancy reduced, cluster-level pathways that use time series data to predict temporal directions between cell types. Fields like immunology and developmental biology are very interested in determining cell lineages and predicting trajectories to determine the systemic differences (and manipulating those systems) between progenitors and differentiated cells. Tempora describes initial scRNA-seq data clean-up procedures and two workflows: generating cell lineage trajectories and determining temporally related pathways. This BCO will focus on the cell lineage trajectory workflow using a human skeletal muscle myoblast dataset. ", - "Paper: https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1008205#sec011", - "GitRepository: https://github.com/BaderLab/Tempora", - "Condensed Worflow: Tempora takes cleaned and annotated scRNA-seq data and uses gene set variation analysis (GSVA) to create enriched pathway profiles for each predetermined cell-type cluster. These profiles then undergo principle component analysis to select the most relevant pathways for each cluster. The reduced profiles are then used to construct a mutual information network between cell clusters. Each cell cluster is then assigned a temporal score according to its cell composition from each time point in the data\u2019s series. The temporal scores are then used to assign direction (from earlier to later) to the edges assigned to each cluster. The final output is viewed as an inferred lineage trajectory map with cell clusters as nodes connected by arrows representing the flow of time in cell lineage.Condensed Worflow: Tempora takes cleaned and annotated scRNA-seq data and uses gene set variation analysis (GSVA) to create enriched pathway profiles for each predetermined cell-type cluster. These profiles then undergo principle component analysis to select the most relevant pathways for each cluster. The reduced profiles are then used to construct a mutual information network between cell clusters. Each cell cluster is then assigned a temporal score according to its cell composition from each time point in the data\u2019s series. The temporal scores are then used to assign direction (from earlier to later) to the edges assigned to each cluster. The final output is viewed as an inferred lineage trajectory map with cell clusters as nodes connected by arrows representing the flow of time in cell lineage." - ], - "score": { - "usability_domain_length": 3342 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000517/v-2.0.2", - "usability_domain": [ - "The Human O-GlcNAc Glycosylation Sites (MCW) contains human (taxid:9606) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", - "The current dataset has 4997 proteins with O-GlcNAc modifications. Out of which 3262 proteins have no site information where as 1735 proteins have 6722 sites.", - "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", - "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Human O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", - "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s human protein master list of UniProtKB canonical accessions to ensure all of the proteins belong to human species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding human UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", - "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a total of 2,237 proteins had an O-GlcNAc score of 3 or under, meaning that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation.", - "The log file - https://data.glygen.org/ln2data/releases/data/v-1.12.3/logs/human_proteoform_glycosylation_sites_o_glcnac_mcw.log contains the entries that were excluded from the output file along with the reason for exclusion.", - "The orginal unprocessed dataset in excel format can be found at https://figshare.com/articles/The_human_O-GlcNAcome_database/12443495/4" - ], - "score": { - "usability_domain_length": 3011 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000517/v-2.1.1", - "usability_domain": [ - "The Human O-GlcNAc Glycosylation Sites (MCW) contains human (taxid:9606) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", - "The current dataset has 4997 proteins with O-GlcNAc modifications. Out of which 3262 proteins have no site information where as 1735 proteins have 6722 sites.", - "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", - "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Human O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", - "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s human protein master list of UniProtKB canonical accessions to ensure all of the proteins belong to human species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding human UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", - "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a total of 2,237 proteins had an O-GlcNAc score of 3 or under, meaning that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation.", - "The log file - https://data.glygen.org/ln2data/releases/data/v-1.12.3/logs/human_proteoform_glycosylation_sites_o_glcnac_mcw.log contains the entries that were excluded from the output file along with the reason for exclusion.", - "The orginal unprocessed dataset in excel format can be found at https://figshare.com/articles/The_human_O-GlcNAcome_database/12443495/4" - ], - "score": { - "usability_domain_length": 3011 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000325/3.0", - "usability_domain": [ - "A virus genome was used as a basis for developing diagnostic tests using polymerase chain reactions (PCR), identifying circulating strains, discovering mutations that could alter transmission rates, pathogenicity, drug research, culminating in vaccine development (Mercer & Salit, 2021).", - "There are two main methods of assembling the genome: reference-based and de novo assembly. The reference-based assembly method is used when the genome of the target organism is available, and the reads are aligned to the reference. In this approach, the focus is on Accepting indels and multiple nucleotides in the structure and gene variant information (Chen et al., 2017). In de novo assembly, consensus sequences are generated without reference genomes by using heuristics to maintain single/ multiple nucleotide variants and indels (Li, 2012).", - "Most of the pipelines developed for the genome assembly of SARS-CoV-2 follow a Reference-based strategies, including Viralrecon, V-pipe, SIGNAL, and CLC. These are only reference-based assemblies and may miss critical information regarding multiple nucleotide variants or indels since they are forced to have the same structure as the reference.", - "However, de novo assemblies are aimed at capturing the natural structure By maintaining any indel or multiple nucleotide variants found in the sequences. Up until now, there has been no published pipeline for combining reference-based and a de novo assembly strategy for the SARS-CoV-2 genome. Combining these strategies would be beneficial a virus sample could be analyzed to gather valuable and reliable information since would recover natural genome information such as indels and multiple nucleotide variants utilize the reference genome for guiding and organizing the de novo assembly as well sequences.", - "Pipcov combines both the advantages of de novo and reference-based assembly strategies. It provides assembly for variant identification of SARS-CoV-2 viruses", - "PipeCoV got smaller and more variable values for the consensus length (average of 29,754 bp) as it combines do novo and reference-based strategies for the assembly. About genome coverage, PipeCov showed an average genome coverage of 97.01%.", - "Another important quality metric is number of N\u2019s, in high-quality assemblies must be less than 1% of the total consensus length and the length of the consensus must be greater than 29,000 bp Briones et al. (2020) PipeCoV generated three consensus sequences with the length between 29.000 bp and 28.837 bp and 64 consensus sequences with more than 300 N\u2019s, that delivers a high quality consensus compared to other pipelines. PipeCoV benchmarked with those 120 paired-end datasets. All data relating to the 120 sequenced samples can be found in the https://www.ncbi.nlm.nih.gov/ pmc/articles/PMC9013232/bin/peerj-10-13300-s001.xlsx PipecoV workflow https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9013232/figure/ fig-1/" - ], - "score": { - "usability_domain_length": 2899 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000633/v-2.0.2", - "usability_domain": [ - "The Fruitfly O-GlcNAc Glycosylation Sites (MCW) contains fruitfly (taxid:7227) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", - "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", - "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Fruitfly O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", - "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s fruitfly protein masterlist of UniProtKB canonical accessions to ensure all of the proteins belong to fruitfly species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding fruitfly UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", - "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." - ], - "score": { - "usability_domain_length": 2476 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000633/v-2.1.1", - "usability_domain": [ - "The Fruitfly O-GlcNAc Glycosylation Sites (MCW) contains fruitfly (taxid:7227) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", - "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", - "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Fruitfly O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", - "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s fruitfly protein masterlist of UniProtKB canonical accessions to ensure all of the proteins belong to fruitfly species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding fruitfly UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", - "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." - ], - "score": { - "usability_domain_length": 2476 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000631/v-2.0.2", - "usability_domain": [ - "The Mouse O-GlcNAc Glycosylation Sites (MCW) contains mouse (taxid:10090) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", - "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", - "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Mouse O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", - "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s mouse protein master list of UniProtKB canonical accessions to ensure all of the proteins belong to mouse species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding mouse UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", - "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." - ], - "score": { - "usability_domain_length": 2460 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000631/v-2.1.1", - "usability_domain": [ - "The Mouse O-GlcNAc Glycosylation Sites (MCW) contains mouse (taxid:10090) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", - "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", - "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Mouse O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", - "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s mouse protein master list of UniProtKB canonical accessions to ensure all of the proteins belong to mouse species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding mouse UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", - "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." - ], - "score": { - "usability_domain_length": 2460 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000632/v-2.0.2", - "usability_domain": [ - "The Rat O-GlcNAc Glycosylation Sites (MCW) contains rat (taxid:10116) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", - "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", - "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Mouse O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", - "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s rat protein masterlist of UniProtKB canonical accessions to ensure all of the proteins belong to rat species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding rat UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", - "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." - ], - "score": { - "usability_domain_length": 2449 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000632/v-2.1.1", - "usability_domain": [ - "The Rat O-GlcNAc Glycosylation Sites (MCW) contains rat (taxid:10116) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO. The dataset also contains other information such as O-GlcNAc score, the count and the techniques used to determine the O-GlcNAc modification and the peptide sequence from the -5 to +5 postion of the modified residue. The dataset also containes disclaimer and notes where the position of sites in publication and in the fasta sequence varies.", - "The dataset is contributed from The O-GlcNAc Database maintained by Stephanie Olivier's lab ", - "Wulff-Fuentes E, Berendt RR, Massman L, Danner L, Malard F, Vora J, Kahsay R, and Olivier-Van Stichelen S, The Mouse O-GlcNAcome Database and Meta-Analysis. Scientific Data 8, 25 (2021) ", - "QC check - The python script is used to process each protein entry of the database table and perform quality control (QC). In the QC checks, the reported UniProtKB accessions in the database are matched with the GlyGen\u2019s rat protein masterlist of UniProtKB canonical accessions to ensure all of the proteins belong to rat species. Next, the O-GlcNAc site data that contained the amino acid residue and its position are mapped to the GlyGen\u2019s fasta sequence for the corresponding rat UniProtKB canonical proteins. The amino acid residues and positions that don't match with the fasta sequence of the canonical protein were flagged and excluded from the table. Similarly, the protein entries that don't have any site information are also excluded. The entries that passed all the quality control steps are exported as a final csv dataset file whereas the excluded entries are exported in a log file with the reasons for exclusion for further manual verification.", - "DISCLAIMER - For each entry, an O-GlcNAc score was attributed to each O-GlcNAcylated protein, reflecting the confidence in the protein\u2019s O-GlcNAcylation identification. The score combined the number of articles and techniques used and the availability of some O-GlcNAcylation sites. Interestingly, a O-GlcNAc score of 3 or under, means that (1) no sites were identified, (2) only one technique was used, and (3) 1-2 articles were associated with the entry. Often identified from large-scale proteomics studies, at least an extra-validation step would be highly recommended to confirm their O-GlcNAcylation." - ], - "score": { - "usability_domain_length": 2449 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000277/4.0", - "usability_domain": [ - "Olduvai protein domains (formerly \"DUF1220\") are the most duplicated protein coding sequence in the human genome (https://doi.org/10.1371/journal.pbio.0020207). They are expressed in many tissues, including strongly in the brain (https://doi.org/10.1126/science.1127980). The copy number of Olduvai domains has been linked to increased brain size (https://doi.org/10.1007/s00429-014-0814-9), and performance on IQ tests (https://doi.org/10.1007/s00439-014-1489-2), as well as neurodiverse states like autism (https://doi.org/10.1371/journal.pgen.1004241).", - "Precise evaluation of copy number in humans has been difficult to achieve, as ratiometric approaches fail to identify small changes when the total number is the ~300 range, as in humans. Read depth approaches using short read WGS data are promising, but most existing pipelines mask repeats altogether, and those that do measure copy number do it relative to the gene. However, Olduvai domains are known to exist in different number and kind within a family of genes, rendering this approach inoperative. The pipeline described here is used to identify the copy number of genetic sequences independent of the genes in which they occur, and with higher fidelity than existing methods, designed with DUF1220 copy number in mind.", - "Approximately 25 individuals were randomly chosen from each of the American (Utah -- Northern and Western European ancestry; CEU), Nigerian (Yoruba; YRI), Han Chinese (Beijing; CHB), Japanese (Tokyo; JPT), Mexican-American (Los Angeles; MXL), Colombian (Medellin; CLM), Puerto Rican (Puerto Rico; PUR), African-American (Southwest US; ASW), Luhya (Webuye, Kenya; LWK), Han Chinese (South China; CHS), Tuscan (Toscana, Italia; TSI), Spanish (Iberian populations; IBS), Finnish (Finland; FIN), and BGR populations for a total of 324 individuals. Where domains were more than 1 kb apart, the boundaries of the domains were extended up to 250 bp to allow the possibility of capturing unique sequence directly adjacent to the domain. No intermediate files were generated because the commands were run executed as a pipe at the command line, so T:/dev/tmpfs was used for the file IOs in the Description Domain. This example pipeline was created based on the work of Astling et al. doi: 10.1186/s12864-017-3976-z" - ], - "score": { - "usability_domain_length": 2287 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000452/1.0", - "usability_domain": [ - "Olduvai protein domains (formerly \"DUF1220\") are the most duplicated protein coding sequence in the human genome (https://doi.org/10.1371/journal.pbio.0020207). They are expressed in many tissues, including strongly in the brain (https://doi.org/10.1126/science.1127980). The copy number of Olduvai domains has been linked to increased brain size (https://doi.org/10.1007/s00429-014-0814-9), and performance on IQ tests (https://doi.org/10.1007/s00439-014-1489-2), as well as neurodiverse states like autism (https://doi.org/10.1371/journal.pgen.1004241).", - "Precise evaluation of copy number in humans has been difficult to achieve, as ratiometric approaches fail to identify small changes when the total number is the ~300 range, as in humans. Read depth approaches using short read WGS data are promising, but most existing pipelines mask repeats altogether, and those that do measure copy number do it relative to the gene. However, Olduvai domains are known to exist in different number and kind within a family of genes, rendering this approach inoperative. The pipeline described here is used to identify the copy number of genetic sequences independent of the genes in which they occur, and with higher fidelity than existing methods, designed with DUF1220 copy number in mind.", - "Approximately 25 individuals were randomly chosen from each of the American (Utah -- Northern and Western European ancestry; CEU), Nigerian (Yoruba; YRI), Han Chinese (Beijing; CHB), Japanese (Tokyo; JPT), Mexican-American (Los Angeles; MXL), Colombian (Medellin; CLM), Puerto Rican (Puerto Rico; PUR), African-American (Southwest US; ASW), Luhya (Webuye, Kenya; LWK), Han Chinese (South China; CHS), Tuscan (Toscana, Italia; TSI), Spanish (Iberian populations; IBS), Finnish (Finland; FIN), and BGR populations for a total of 324 individuals. Where domains were more than 1 kb apart, the boundaries of the domains were extended up to 250 bp to allow the possibility of capturing unique sequence directly adjacent to the domain. No intermediate files were generated because the commands were run executed as a pipe at the command line, so T:/dev/tmpfs was used for the file IOs in the Description Domain. This example pipeline was created based on the work of Astling et al. doi: 10.1186/s12864-017-3976-z" - ], - "score": { - "usability_domain_length": 2287 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000301/1.0", - "usability_domain": [ - "Astrocytes are the most abundant glial cells in the brain. Over the past few decades, the classical view of astrocytes being merely supportive cells in the nervous system has dramatically extended to include the important roles of astrocytes in neuronal, blood vessel and glial function (https://doi.org/10.1016/j.tcb.2016.01.003). Astrocytes are known to respond to neurotransmitters released from neurons by increasing intracellular calcium. These calcium events can further propagate to neighboring cells through gap junctions, causing the release of gliotransmitters which bind to neuronal receptors and eventually facilitate synaptic transmission (https://doi.org/10.1016/j.neuron.2014.02.007). This crosstalk between astrocytes and neurons via calcium signaling indicates the active role of astrocytes in neural signaling and synaptic plasticity (DOI: 10.1109/TMBMC.2022.3142621). ", - "Given that astrocyte calcium signaling is critical to healthy regulation of neuronal activity and physiology, it is imperative to be able to study intercellular communication in astrocytes at the network level. While there are existing methods for analyzing calcium events in astrocytes, these tools are limited to evaluating calcium signaling at an individual cellular level. Additionally, these tools are often based on using pre-defined regions of interest (ROIs), which may offer bias in analysis. ", - "Astral is a novel tool that allows for analysis of the functional interactions between astrocytes mediated by their calcium signaling at the network level. Its unique pipeline quantifies astrocytic calcium events without the need for pre-defined ROIs, and analyzes intercellular propagation based on live-cell imaging. Astral consists of a core-processing pipeline for detection and quantification of Ca2+ events, as well as a visualization tool for data quality control. Altogether, Astral is powerful and novel tool that enables a novel approach in studying astrocyte-neuronal interactions at the network level ", - "https://doi.org/10.3389/fncel.2021.689268", - "This BCO provides information on Astral's core processing pipeline for the detection and quantification of calcium events (not the second-part visualization tool for data quality control)." - ], - "score": { - "usability_domain_length": 2232 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000088/1.0.36", - "usability_domain": [ - " A0392 Gene expression signature profile (measured by calculating the cosinor correlation of the sample expression profile to a template, (the mean expression profile of 44 tumors with a known good clinical outcome), and by determining the 70-gene molecular profile of the sample (Low Risk, High Risk) for breast cancer recurrence. The genes are AKAP2 (UPKB:Q9Y2D5), AP2B1 (UPKB:P63010), BBC3 (UPKB:Q9BXHI), CCN4 (UPKB:O95388), Clone HQ0310 PRO0310p1 (HQ0310), COL4A2 (UPKB:P08572), CMC2 (UPKB:Q9NRP2), ALDH4A1 (UPKB:P30038), DTL (UPKB:Q9NZJ0), DCK (UPKB:P27707), MCM6 (UPKB:Q14566), ESM1 (UPKB:Q9NQ30), ECI2 (UPKB:O75521), EXT1 (UPKB:Q16394), Expressed sequence tag (EST6), Expressed sequence tag (EST7), Expressed sequence tag (EST8), Expressed sequence tag (EST9), Expressed sequence tag (EST10), Expressed sequence tag (EST11), Expressed sequence tag (EST12), Expressed sequence tag (EST13), Expressed sequence tag (EST14), Expressed sequence tag (EST15), Expressed sequence tag (EST1), Expressed sequence tag (EST3), Expressed sequence tag (EST4), Expressed sequence tag (EST5), Expressed sequence tag (EST2), FGF18 (UPKB:O76093), CCNE2 (UPKB:O96020), GSTPK, GSTM3 (UPKB:P21266), GMPS (UPKB:P49915), GNAZ (UPKB:P19086), HEC, CENPA (UPKB:P49450), Homo sapiens mRNA cDNA DKFZp434C0931 (from clone DKFZp434C0931) partial cds (DKFZp434C0931mRNA, Hypothetical protein DKFZp564D0462 (DKFZp564D0462), Hypothetical protein FLJ11190 (FLJ11190), Hypothetical protein FLJ11354 (FLJ11354), Hypothetical protein FLJ12443 (FLJ12443), Hypothetical protein FLJ22477 (FLJ22477), IGFBP5 (UPKB:P24593), MELK (UPKB:Q14680), MMP9 (UPKB:P14780), CFFM4 (UPKB:Q9GZW8), MP1, NMU (UPKB:P48645), ORC6 (UPKB:Q9Y5N6), PLAAT3 (UPKB:P53816), ECT2 (UPKB:Q9H8V3), PRC1 (UPKB:O43663), RAB6B (UPKB:Q9NRW1), RFC4 (UPKB:P35249), Ser-Thr protein kinase related to the myotonic dystrophy protein kinase, SCUBE2 (UPKB:Q9NQ36), SM20, SERF1A (UPKB:O75920), SLC2A3 (UPKB:P11169), OXCT1 (UPKB:P55809), TSPYL5 (UPKB:Q86VY4), TMEFF1 (UPKB:Q8IYR6), EBF4 (UPKB:Q9BQW3), TGFB3 (UPKB:P10600), UCHL5 (UPKB:Q9Y5K5), FLT1 (UPKB:P17948). This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 2145 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000303/3.0", - "usability_domain": [ - "Apoptosis is an important process for maintaining homeostasis, eliminating cells that have been damaged beyond repair, and preventing cancer. If there is a problem with apoptosis, it could lead to uncontrolled cell proliferation and development of a tumor (https://pubmed.ncbi.nlm.nih.gov/18846107/).", - "Proteins of the B-cell lymphoma-2 (Bcl-2) family are known to regulate programmed cell death through the mitochondrial apoptosis pathway. They are therefore considered pro-survival proteins given that a major characteristic of cancer is its ability to avoid programmed cell death (https://pubmed.ncbi.nlm.nih.gov/10398099/). This makes them a therapeutic target for drug discovery (https://pubmed.ncbi.nlm.nih.gov/28735187/). When there is intracellular stress, the apoptotic homeostasis is controlled by the activator and sensitizer BH3 (Bcl-2 homology 2)-only proteins. Changes in expression levels of these proteins can lead to an imbalance. More specifically, overexpression can cause oncogenic effects (https://pubmed.ncbi.nlm.nih.gov/24355989/). Normally, the Bcl-2 family proteins will bind to the BH3 motif of pro-apoptotic proteins to create a network of protein-protein interactions, but any dysfunction will result in the cancer cells to evade cell death. This study emphasizes the importance of future experimental research on the protein network between Bcl-2 and BH3 within a breast cancer context.", - "Bioinformatic approaches were done in order to link -omics with structural data. There was no change in the parameters from the default. This BCO represents the steps taken to identify protein interaction partners of the Bcl-2 family members that contain the BH3 motif in breast cancer samples. This was done by first retrieving the experimentally known Bcl-2 family interactions from the human Integrated Interaction Database (IID), and then the interaction list was filtered to only include those proteins containing the BH3 motif. Out of the 560 protein-protein interactions that were collected, 295 of them were selected as possible BH3-containing proteins. Out of the 295 proteins, 282 were identified as BH-3 only." - ], - "score": { - "usability_domain_length": 2132 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000059/1.0", - "usability_domain": [ - "The Human Cancer Mutation dataset contains human [taxid:9606] mutation data from BioMuta database (https://hive.biochemistry.gwu.edu/biomuta).\nThe current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer.\nBioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and NCBI RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates.\nThe dataset is imported and contributed by BioMuta and processed by GlyGen. If you use this dataset please provide proper attribution to BioMuta and GlyGen.\nNote the mutation shown in GlyGen are annotated based on passing of one or more filter criteria.\n- Filter criteria 1 - Positive Patient's Frequency Ratio \\u2265 1.0%,\n- Filter criteria 2 - Presence of mutation in number of cancer types \\u2265 3 cancer types,\n- Filter criteria 3 - Positive Patient's count \\u2265 10 patients,\n- Filter criteria 4 - Mutation reported and annotated in number of databases,\n- Filter criteria 5 - Mutation mined from the literature using literature mining tools (human_protein_mutation_literature.csv),\nIf there is also a germline mutation reported in dbSNP, the rsid is also shown\nThese filter criteria annotations are shown on the front end under the column Annotation Name\nAlso if the same mutation in the same cancer type on a given protein is reported in ICGC and TCGA then mutations from TCGA are only shown.\nWhen mutations from TCGA are not available but are present in ICGC for the same protein and cancer type, ICGC mutations are shown in GlyGen,\nTwo different DO child terms are merged into one single entry showing the parent cancer DO term from the DO cancer slim.\nOn the GlyGen interface, in Mutations section the mutations that have passed highest number of filter criteria are displayed first irrespective of their amino acid positions. For eg. the mutation entry that has passed 5 filter criteria will be shown first." - ], - "score": { - "usability_domain_length": 2003 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000004/v-2.0.2", - "usability_domain": [ - "The Human Glycosyltransferases dataset contains list of human [taxid:9606] glycosyltransferases enzymes with evidence of glycosyltransferase activity defined by one or several of the following criteria: gene ontology (GO) or UniProtKB/Swiss-Prot annotation; classification by CAZY database; inclusion in the GT panel developed by the Consortium of Functional Glycomics (CFG); domains reported by InterPro and Pfam. The dataset was created by: Proteomes, gene and protein lists were retrieved from UniProtKB/Swiss-Prot. The full set of proteins comprising the human proteome was retrieved with the following search terms: keyword:\"Complete proteome [KW-0181]\" (now removed from UniProtKB) AND reviewed:yes AND organism:\"Homo sapiens (Human) [9606]\". To retrieve the list of all human glycosyltransferases, the full list of human proteins were filtered using Gene Ontology (GO) terms, with molecular function GO term, GO:0016757 transferase activity, transferring glycosyl groups, each corresponding to a unique gene. To ensure the gene list was comprehensive with respect to available annotations, entries were cross-referenced with two external glycomics databases: the Carbohydrate Active Enzymes database (CAZY, http://www.cazy.org/), the Consortium for Functional Glycomics (CFG) functional glycomics gateway (http://www.functionalglycomics.org/), BRENDA (https://www.brenda-enzymes.org/index.php) and Enzyme Portal (https://www.ebi.ac.uk/enzymeportal/). The list was then filtered to remove any duplicate references resulting from cross-database redundancy or secondary accessions. Entries not already contained in the GO-derived glycosyltransferase gene list were added to the list. InterPro and Pfam accessions were then retrieved for all genes currently in the list. For GlyGen, GTs that were not part of glycan metabolism were removed to produce a manually curated list of glycosyltransferase. If you use this dataset please provide proper attribution to UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 1985 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000004/v-2.1.1", - "usability_domain": [ - "The Human Glycosyltransferases dataset contains list of human [taxid:9606] glycosyltransferases enzymes with evidence of glycosyltransferase activity defined by one or several of the following criteria: gene ontology (GO) or UniProtKB/Swiss-Prot annotation; classification by CAZY database; inclusion in the GT panel developed by the Consortium of Functional Glycomics (CFG); domains reported by InterPro and Pfam. The dataset was created by: Proteomes, gene and protein lists were retrieved from UniProtKB/Swiss-Prot. The full set of proteins comprising the human proteome was retrieved with the following search terms: keyword:\"Complete proteome [KW-0181]\" (now removed from UniProtKB) AND reviewed:yes AND organism:\"Homo sapiens (Human) [9606]\". To retrieve the list of all human glycosyltransferases, the full list of human proteins were filtered using Gene Ontology (GO) terms, with molecular function GO term, GO:0016757 transferase activity, transferring glycosyl groups, each corresponding to a unique gene. To ensure the gene list was comprehensive with respect to available annotations, entries were cross-referenced with two external glycomics databases: the Carbohydrate Active Enzymes database (CAZY, http://www.cazy.org/), the Consortium for Functional Glycomics (CFG) functional glycomics gateway (http://www.functionalglycomics.org/), BRENDA (https://www.brenda-enzymes.org/index.php) and Enzyme Portal (https://www.ebi.ac.uk/enzymeportal/). The list was then filtered to remove any duplicate references resulting from cross-database redundancy or secondary accessions. Entries not already contained in the GO-derived glycosyltransferase gene list were added to the list. InterPro and Pfam accessions were then retrieved for all genes currently in the list. For GlyGen, GTs that were not part of glycan metabolism were removed to produce a manually curated list of glycosyltransferase. If you use this dataset please provide proper attribution to UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 1985 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000024/v-2.0.2", - "usability_domain": [ - "The Human Cancer Mutation dataset contains human [taxid:9606] mutation data from BioMuta database (https://hive.biochemistry.gwu.edu/biomuta).", - "The current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer.", - "BioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and NCBI RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates.", - "The dataset is imported and contributed by BioMuta and processed by GlyGen. If you use this dataset please provide proper attribution to BioMuta and GlyGen. ", - "Note the mutation shown in GlyGen are annotated based on passing of one or more filter criterias.", - "Filter criteria 1 - Positive Patient's Frequency Ratio \u2265 1.0%", - "Filter criteria 2 - Presence of mutation in number of cancer types \u2265 3 cancer types", - "Filter criteria 3 - Positive Patient's count \u2265 10 patients", - "Filter criteria 4 - Mutation reported and annotated in number of databases", - "Filter criteria 5 - Mutation minned from the literature using literaure mining tools (human_protein_mutation_literature.csv)", - "If there is also a germline mutation reported in dbSNP, the rsid is also shown", - "These filter criteria annotations are shown on the front end under the column Annotation Name", - "Also if the same mutation in the same cancer type on a given protein is reported in ICGC and TCGA then mutations from TCGA are only shown.", - "When mutations from TCGA are not available but are present in ICGC for the same protein and cancer type, ICGC mutations are shown in GlyGen", - "Two different DO child terms are merged into one single entry showing the parent cancer DO term from the DO cancer slim.", - "On the GlyGen interface, in Mutations section the mutations that have passed highest number of filter criteria are displayed first irrespective of their amio acid positions. For eg. the mutation entry that has passed 5 filter criteria will be shown first." - ], - "score": { - "usability_domain_length": 1958 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000284/3.0", - "usability_domain": [ - "Apoptosis is an important process for maintaining homeostasis, eliminating cells that have been damaged beyond repair, and preventing cancer. If there is a problem with apoptosis, it could lead to uncontrolled cell proliferation and development of a tumor. ", - "Proteins of the B-cell lymphoma-2 (Bcl-2) family are known to regulate programmed cell death through the mitochondrial apoptosis pathway. They are therefore considered pro-survival proteins given that a major characteristic of cancer is its ability to avoid programmed cell death. This makes them a therapeutic target for drug discovery. When there is intracellular stress, the apoptotic homeostasis is controlled by the activator and sensitizer BH3 (Bcl-2 homology 2)-only proteins. Changes in expression levels of these proteins can lead to an imbalance. More specifically, overexpression can cause oncogenic effects. Normally, the Bcl-2 family proteins will bind to the BH3 motif of pro-apoptotic proteins to create a network of protein-protein interactions, but any dysfunction will result in the cancer cells to evade cell death. This study emphasizes the importance of future experimental research on the protein network between Bcl-2 and BH3 within a breast cancer context.", - "Bioinformatic approaches were done in order to link -omics with structural data. There was no change in the parameters from the default. This BCO represents the steps taken to identify protein interaction partners of the Bcl-2 family members that contain the BH3 motif in breast cancer samples. This was done by first retrieving the experimentally known Bcl-2 family interactions from the human Integrated Interaction Database (IID), and then the interaction list was filtered to only include those proteins containing the BH3 motif. Out of the 560 protein-protein interactions that were collected, 295 of them were selected as possible BH3-containing proteins. Out of the 295 proteins, 282 were identified as BH-3 only. " - ], - "score": { - "usability_domain_length": 1958 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000024/v-2.1.1", - "usability_domain": [ - "The Human Cancer Mutation dataset contains human [taxid:9606] mutation data from BioMuta database (https://hive.biochemistry.gwu.edu/biomuta).", - "The current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer.", - "BioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and NCBI RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates.", - "The dataset is imported and contributed by BioMuta and processed by GlyGen. If you use this dataset please provide proper attribution to BioMuta and GlyGen. ", - "Note the mutation shown in GlyGen are annotated based on passing of one or more filter criterias.", - "Filter criteria 1 - Positive Patient's Frequency Ratio \u2265 1.0%", - "Filter criteria 2 - Presence of mutation in number of cancer types \u2265 3 cancer types", - "Filter criteria 3 - Positive Patient's count \u2265 10 patients", - "Filter criteria 4 - Mutation reported and annotated in number of databases", - "Filter criteria 5 - Mutation minned from the literature using literaure mining tools (human_protein_mutation_literature.csv)", - "If there is also a germline mutation reported in dbSNP, the rsid is also shown", - "These filter criteria annotations are shown on the front end under the column Annotation Name", - "Also if the same mutation in the same cancer type on a given protein is reported in ICGC and TCGA then mutations from TCGA are only shown.", - "When mutations from TCGA are not available but are present in ICGC for the same protein and cancer type, ICGC mutations are shown in GlyGen", - "Two different DO child terms are merged into one single entry showing the parent cancer DO term from the DO cancer slim.", - "On the GlyGen interface, in Mutations section the mutations that have passed highest number of filter criteria are displayed first irrespective of their amio acid positions. For eg. the mutation entry that has passed 5 filter criteria will be shown first." - ], - "score": { - "usability_domain_length": 1958 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000282/1.2.1", - "usability_domain": [ - "Natural killer (NK) cells are very diverse. They play major roles in hominid immunity and reproduction (10.3389/fimmu.2019.00177). Killer cell immunoglobulin- like receptors are key receptors for NK cells development and function in humans (https://doi.org/10.5045/kjh.2011.46.4.216). KIR's genes span 10-16kb each and recombination through the different haplotypes has made their order and copy number highly variable. Their genes encode proteins that recognize human leukocyte antigen (HLA) that initiate signaling pathways in NK cells. This leads to the release of cytokines or death of the target cell. ", - "Full haplotyping requires physical separation and amplification of maternal and paternal haplotypes via fosmids for subsequent sequencing. This approach is high cost and cannot be used for high scale studies. It is also difficult to interpret KIR haplotypes for an individual human genome considering the reads from high-throughput sequencing cannot be deciphered because the structural arrangements are unknown. This requires a more specific and careful interpretation of the KIR region than in most other regions in the human genome. The pipeline described here is used to capture, sequence, assemble and annotate diploid human KIR haplotypes. This approach is meant to efficiently capture 2-8kb fragments of DNA for better identification of haplotypes. ", - "In this study a cohort of 8 African Americans and 8 Europeans were used and the results demonstrated that every KIR gene and intergene contains constant regions that are targetable by capture probes. By targeting the constant regions the variable regions can be captured and sequenced by standard PacBio workflows. This approach also requires no prior knowledge of the individual or references, while only utilizing the standard lab workflows with access to free and open software. " - ], - "score": { - "usability_domain_length": 1846 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000269/1.0", - "usability_domain": [ - "Reliably annotated microbial genomes in public databases have been decreasing over the years and this could be due to automatic annotation of microbial genomes rather than doing a manual annotation. These poor annotations of automated microbial annotation pipelines are looking for the first/best hit and not based on the experimentally verified annotation. ", - "It\u2019s important to have a more reliable and accurate genome annotation as it\u2019s one of the key things to understand the complete genetic material of many organisms.", - "Multi-threaded Enhanced prokaryotic Genome Annotator (MEGAnnotator) is a bioinformatics pipeline that generates annotated GenBank files for microbial genomes using DNA shotgun sequencing reads.", - "It is beneficial for microbiologist researchers interested in genome analyses of bacteria and microbiome of organisms as it provides reduced redundant annotations and is efficient in pre-arranging assembly and annotation work to process NGS genome sequence data.", - "It is also a user-friendly pipeline that allows the annotation of complex genome analyses for investigators that cannot prepare their own bioinformatics pipeline.", - "Furthermore, it helps solve the problem of inaccurately automated annotation of microbial genomes, improves the quality of the microbial genome annotation, and allows the user to annotate partially pre-assembled genomes, as well as assemble metagenomic data sets.", - "As shown by the results where the number of contigs were reduced for each organism's output , MEGAnnotator is able to accurately depict and annotate the genome assembly that it curates. Thus allowing a more accurate and efficient (by lowering the time) way of genome annotation compared to other publicly accessed bioinformatics pipeline with automated annotation." - ], - "score": { - "usability_domain_length": 1764 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000295/0.1", - "usability_domain": [ - "This BCO represents an ALS gene scanning pipeline, through the adaption of a fast and efficient bioinformatics pipeline called DNAscan. DNAscan allows for the analysis of DNA next generation sequencing data, using very little memory usage and computational effort. ", - "This pipeline allows for the identification of genetic factors characterized by ALS, utilizing the identification of variants in more than 25 genes with a very strong correlation to ALS, in addition to variants in more than 120 genes with a weaker correlation to ALS. As a result of the rapidly increasing availability of next-generation sequencing data, patients and health care professionals are obtaining genomic information without the ability to evaluate and deduce their findings. In addition to this phenomenon, the relevance of variants in ALS genes is not easily evident. With the implementation of this pipeline, patients and providers are able to obtain an easily accessible tool that can provide an automatic and comprehensive annotated report on a list of ALS genes from whole-genome and whole-exome sequencing data on a typical computer in less than 5 hours.", - "The ALS gene scanning pipeline performs alignment, variant calling, structural variant calling, repeat expansion calling, and variant annotation using Annovar.", - "ALSgeneScanner restricts the analysis to a subset of genes associated with ALS, prioritizing variants according to scientific evidence of the gene association and the effect prediction of the variant.", - "The required input genome or exome sequencing data should be taken from ALS patients, which will then be referenced to hg19 or grch37." - ], - "score": { - "usability_domain_length": 1630 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000295/0.10", - "usability_domain": [ - "This BCO represents an ALS gene scanning pipeline, through the adaption of a fast and efficient bioinformatics pipeline called DNAscan. DNAscan allows for the analysis of DNA next generation sequencing data, using very little memory usage and computational effort. ", - "This pipeline allows for the identification of genetic factors characterized by ALS, utilizing the identification of variants in more than 25 genes with a very strong correlation to ALS, in addition to variants in more than 120 genes with a weaker correlation to ALS. As a result of the rapidly increasing availability of next-generation sequencing data, patients and health care professionals are obtaining genomic information without the ability to evaluate and deduce their findings. In addition to this phenomenon, the relevance of variants in ALS genes is not easily evident. With the implementation of this pipeline, patients and providers are able to obtain an easily accessible tool that can provide an automatic and comprehensive annotated report on a list of ALS genes from whole-genome and whole-exome sequencing data on a typical computer in less than 5 hours.", - "The ALS gene scanning pipeline performs alignment, variant calling, structural variant calling, repeat expansion calling, and variant annotation using Annovar.", - "ALSgeneScanner restricts the analysis to a subset of genes associated with ALS, prioritizing variants according to scientific evidence of the gene association and the effect prediction of the variant.", - "The required input genome or exome sequencing data should be taken from ALS patients, which will then be referenced to hg19 or grch37." - ], - "score": { - "usability_domain_length": 1630 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_022574/1.0", - "usability_domain": [ - "The purpose of this study is to identify the integration sites of retroviruses including a common one which is the HIV virus. The pipeline describes the steps after the DNA has been clones and sequences. The sequencing left the sample with artifacts that are then reduced using a bioinformatics pipeline. The first step is to differentiate reads by using their sample index as well as identification of both 5\u2019and 3\u2019 ends. Next, reference genomes are used (human sequenced genomes that were infected with the HIV virus) for genome alignment using a BLAST-like tool. The BLAT alignment allows for similar sequences to be classified at the chromosomal level based on a score system. The pipeline then removes sequences that do not match or that are not on the same chromosomes as artifacts. The alignment requires reads to be on the same chromosome, on opposite strands, with a size of not more than 1kb. The host breakpoint and the LTR DNA junctions only is considered a valid integration sites when those main criteria are met. In fact, this tool allows for the results to be less noisy as artifacts are removed thoroughly for better genomic outcomes. The pipeline predicts priming sites from the sequenced genomes and provides raw data sequence for the discovered integrations sites. The pipeline analysis was established and published on BMC genomics. The citation is as follows Wells, D.W., Guo, S., Shao, W. et al. An analytical pipeline for identifying and mapping the integration sites of HIV and other retroviruses. BMC Genomics 21, 216 (2020). https://doi.org/10.1186/s12864-020-6647-4" - ], - "score": { - "usability_domain_length": 1595 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000016/1.4", - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_object - The dataset this property is used; requirement - Indicates if the property is REQUIRED to hava a valid data row; id - For JSON schema conversion; title - Human readable name for property. Default is the same as property; data_type - Property type as defined by JSON types; constraint - Set per a term to indicate an acceptable value range. Can be used as a QC tool. default - Default value for property; examples - Example for the property; pattern - The regular expression evaluation for this property. Can be used as a QC tool. \nThe primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for all file types with the designation of ngsQC*, siteQC*, assembleyQC* and biosampleMeta* is following a consistent representation of the data properties.\n*files are of HIVE or NCBI origin. " - ], - "score": { - "usability_domain_length": 1548 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000016/1.5", - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_object - The dataset this property is used; requirement - Indicates if the property is REQUIRED to hava a valid data row; id - For JSON schema conversion; title - Human readable name for property. Default is the same as property; data_type - Property type as defined by JSON types; constraint - Set per a term to indicate an acceptable value range. Can be used as a QC tool. default - Default value for property; examples - Example for the property; pattern - The regular expression evaluation for this property. Can be used as a QC tool. \nThe primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for all file types with the designation of ngsQC*, siteQC*, assembleyQC* and biosampleMeta* is following a consistent representation of the data properties.\n*files are of HIVE or NCBI origin. " - ], - "score": { - "usability_domain_length": 1548 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000017/1.4", - "usability_domain": [ - "List of controlled vocabulary terms for data.ARGOSdb Annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_object - The dataset this property is used; requirement - Indicates if the property is REQUIRED to hava a valid data row; id - For JSON schema conversion; title - Human readable name for property. Default is the same as property; data_type - Property type as defined by JSON types; constraint - Set per a term to indicate an acceptable value range. Can be used as a QC tool; default - Default value for property; examples - Example for the property; pattern - The regular expression evaluation for this property. Can be used as a QC tool. Currently all terms are sourced from the DRM_all_orgs.tsv (ARGOS_000055). The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adheres to a controlled vocabulary of data properties." - ], - "score": { - "usability_domain_length": 1532 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000017/1.5", - "usability_domain": [ - "List of controlled vocabulary terms for data.ARGOSdb Annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_object - The dataset this property is used; requirement - Indicates if the property is REQUIRED to hava a valid data row; id - For JSON schema conversion; title - Human readable name for property. Default is the same as property; data_type - Property type as defined by JSON types; constraint - Set per a term to indicate an acceptable value range. Can be used as a QC tool; default - Default value for property; examples - Example for the property; pattern - The regular expression evaluation for this property. Can be used as a QC tool. Currently all terms are sourced from the DRM_all_orgs.tsv (ARGOS_000055). The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adheres to a controlled vocabulary of data properties." - ], - "score": { - "usability_domain_length": 1532 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000016/1.0", - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." - ], - "score": { - "usability_domain_length": 1469 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000016/1.1", - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." - ], - "score": { - "usability_domain_length": 1469 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000016/1.2", - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." - ], - "score": { - "usability_domain_length": 1469 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000016/1.3", - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb QC and biosample metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." - ], - "score": { - "usability_domain_length": 1469 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000271/1.0", - "usability_domain": [ - "This BCO represents a pipeline called SEARCHIN, which has the ability to identify ligand-mediated interactions between different cellular compartments. The output of SEARCHIN consists of an extensively reduced list of potential interactions between ligands produced by one compartment and receptors produced in another compartment, formulating a ranked list. From this list, candidates for experimental validation can be effectively selected.", - "The pipeline was assessed through a model of amyotrophic lateral sclerosis (ALS), in which astrocytes expressing mutant superoxide dismutase-1 destroy wild-type motor neurons through an undetermined mechanism. The pipeline inferred that the top predicted ligand-receptor pairs as an interaction between astrocyte-released amyloid precursor protein and death receptor-6 on motor neurons. This inferred deleterious role of amyloid precursor protein and death receptor-6 was later confirmed in vitro in models of ALS.", - "This pipeline consists of well-validated algorithms like CINDy, VIPER, and PrePPI, which were not originally designed to study cell-cell communication processes, to test this methodology by generating testable and reasonable hypotheses that can be experimentally validated. Each individual step maintains their own default parameters and thresholds which are based on extensive benchmarks performed in previous original manuscripts in which the algorithms were originally produced." - ], - "score": { - "usability_domain_length": 1437 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000017/1.3", - "usability_domain": [ - "List of non-core controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property. The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adheres to a controlled vocabulary of data properties." - ], - "score": { - "usability_domain_length": 1433 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000017/1.0", - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "score": { - "usability_domain_length": 1422 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000017/1.1", - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "score": { - "usability_domain_length": 1422 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000017/1.2", - "usability_domain": [ - "List of controlled vocabulary terms for ARGOSdb annotation and metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Data Object Type - The dataset this property is used in. Optional/Required - indicates if the property is REQUIRED to hava a valid data row. $id - For JSON schema conversion. Title - Human readable name for property. Default is the same as property. Type - property type as defined by JSON types. default - a default value for property. examples - and example for the property. pattern - the regular expression evaluation for this property. description - A definition and additional information about the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org is following a consistent representation, and adhears to a controlled vocabulary of data properties." - ], - "score": { - "usability_domain_length": 1422 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000300/1.0", - "usability_domain": [ - "Bovine tuberculosis (bTB) is an epidemic disease in cattle as a result of infection with Mycobacterium bovis. While 99.95% similar to the human variant, M. tuberculosis, this disease primarily infects bovids like cattle and bison. The typical response in a positive infection is to slaughter the infected cattle, and thus the economic weight of bTB is extremely high.", - "Aerosolized mycobacterium interact with bovine alveolar macrophages (bAM) upon inhalation and either begin replicating or are cleared by the adaptive immune response. The bacteria use several mechanisms for immune evasion: inactivation of ROS and RNI, molecular mimicry, reduction of interferon signaling, etc. ", - "This study seeks to understand the genetic predisposition that certain cattle have for reducing the likelihood of a bTB establishment by using data from genome-wide association studies (GWAS) to compare the bAM genomic similarities and differences in cattle. ", - "Differentially expressed genes were analyzed using DESeq2 and this genomic data was run through the R suite DGCA and Cytoscape to generate the correlation networks at 24 hpi and 48 hpi. Of the three different pipelines used to analyze these infected bAM for gene-gene alterations, only the correlation network approach provided data on all three types of cattle tested in a post-infection context." - ], - "score": { - "usability_domain_length": 1334 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000015/1.0", - "usability_domain": [ - "Definitions for the controlled vocabulary used by ARGOSdb QC and biosample metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Name in Files - Alternate names for data property in existing datasets. Data Files - A `|` separated list of dataset names where this property is utilized. recommended - The person or resource that suggested using the property. Description - A definition and additional information about the property. source/type def - The data source for obtaining the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." - ], - "score": { - "usability_domain_length": 1328 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000015/1.1", - "usability_domain": [ - "Definitions for the controlled vocabulary used by ARGOSdb QC and biosample metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Name in Files - Alternate names for data property in existing datasets. Data Files - A `|` separated list of dataset names where this property is utilized. recommended - The person or resource that suggested using the property. Description - A definition and additional information about the property. source/type def - The data source for obtaining the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." - ], - "score": { - "usability_domain_length": 1328 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000015/1.2", - "usability_domain": [ - "Definitions for the controlled vocabulary used by ARGOSdb QC and biosample metadata table data properties.", - "This sheet was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb.", - "The final result here is a list of properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Name in Files - Alternate names for data property in existing datasets. Data Files - A `|` separated list of dataset names where this property is utilized. recommended - The person or resource that suggested using the property. Description - A definition and additional information about the property. source/type def - The data source for obtaining the property.", - "The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org for four file types ngsQC.tsv, siteQC.tsv, assembleyQC.tsv and biosampleMeta.tsv is following a consistent representation of the data properties." - ], - "score": { - "usability_domain_length": 1328 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_016880/1.0", - "usability_domain": [ - "The purpose of this study is to identify the integration sites of retroviruses including a common one which is the HIV virus. The pipeline describes the steps after the DNA has been clones and sequences. The sequencing left the sample with artifacts that are then reduced using a bioinformatics pipeline. The first step is to differentiate reads by using their sample index as well as identification of both 5\u2019and 3\u2019 ends. Next, reference genomes are used (human sequenced genomes that were infected with the HIV virus) for genome alignment using a BLAST-like tool. The BLAT alignment allows for similar sequences to be classified at the chromosomal level based on a score system. The pipeline then removes sequences that do not match or that are not on the same chromosomes as artifacts. The alignment requires reads to be on the same chromosome, on opposite strands, with a size of not more than 1kb. The host breakpoint and the LTR DNA junctions only is considered a valid integration sites when those main criteria are met. In fact, this tool allows for the results to be less noisy as artifacts are removed thoroughly for better genomic outcomes. The pipeline predicts priming sites from the sequenced genomes and provides raw data sequence for the discovered integrations sites. " - ], - "score": { - "usability_domain_length": 1287 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000015/1.5", - "usability_domain": [ - "Core and Annotation definitions and properties of the controlled vocabulary (data dictionary) used by data.ARGOSdb data sets.", - "List of controlled vocabulary terms for data.ARGOSdb.org datasets and data properties. This data dictionary was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on data.ARGOSdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in data.ARGOSdb. A README and Release Notes are available per each version of the Data Dictionary. The primary use case for the data dictionary is to ensure all data submitted to data.argosdb.org is following a consistent representation of the data properties. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_files - A `|` separated list of dataset names where this property is utilized; recommended - The person or resource that suggested using the property; description - A definition and additional information about the property; source_or_type_def - The data source for obtaining the property." - ], - "score": { - "usability_domain_length": 1275 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000015/1.6", - "usability_domain": [ - "Core and Annotation definitions and properties of the controlled vocabulary (data dictionary) used by data.ARGOSdb data sets.", - "List of controlled vocabulary terms for data.ARGOSdb.org datasets and data properties. This data dictionary was created to aid in the integration of ARGOS data from many disparate sources. Each of the column headers in each of the respective data sheets displayed on data.ARGOSdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in data.ARGOSdb. A README and Release Notes are available per each version of the Data Dictionary. The primary use case for the data dictionary is to ensure all data submitted to data.argosdb.org is following a consistent representation of the data properties. The following are the column headers and their meaning: property - Consensus name for data property described in row; data_files - A `|` separated list of dataset names where this property is utilized; recommended - The person or resource that suggested using the property; description - A definition and additional information about the property; source_or_type_def - The data source for obtaining the property." - ], - "score": { - "usability_domain_length": 1275 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000015/1.3", - "usability_domain": [ - "Core and Non-core definitions and properties of the controlled vocabulary (data dictionary) used by ARGOSdb data sets.", - "Each of the column headers in each of the respective data sheets displayed on www.data.argosdb.org was recommended by project members and collaborators from the FDA. The resulting list was manually curated to combine similar terms, and provide a consistent representation across all datasets in ARGOSdb. The final result here is a list of definitions and properties and descriptive information about the property. The following are the column headers and their meaning: Property - consensus name for data property described in row. Name in Files - Alternate names for data property in existing datasets. Data Files - A `|` separated list of dataset names where this property is utilized. recommended - The person or resource that suggested using the property. Description - A definition and additional information about the property. source/type def - The data source for obtaining the property. The primary use case for this property definition list is to ensure all data submitted to data.argosdb.org (Core or Non-Core) are following a consistent representation of the data properties so that they can be easily integrated, as well as QC'd for outliers. " - ], - "score": { - "usability_domain_length": 1274 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000084/v-2.0.2", - "usability_domain": [ - "The Human Protein Diseases dataset contains list of human [taxid:9606] diseases from OMIM, MONDO and Genomics England databases mapped to the UniProtKB protein canonical accessions and Disease Ontology (DO) identifiers..", - "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship.", - "The Monarch Initiative is focused primarily on phenotype-related resources and brings in data associated with those phenotypes so that the users can begin to make connections among other biological entities of interest.", - "The Disease Ontology has been developed as a standardized ontology for human disease with the purpose of providing the biomedical community with consistent, reusable and sustainable descriptions of human disease terms, phenotype characteristics and related medical vocabulary disease concepts through collaborative efforts of biomedical researchers, coordinated by the University of Maryland School of Medicine, Institute for Genome Sciences.", - "If you are using this dataset please give proper attribution to OMIM, Monarch Initiative, DO, and EMBL-EBI-UniProt and GlyGen" - ], - "score": { - "usability_domain_length": 1191 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000084/v-2.1.1", - "usability_domain": [ - "The Human Protein Diseases dataset contains list of human [taxid:9606] diseases from OMIM, MONDO and Genomics England databases mapped to the UniProtKB protein canonical accessions and Disease Ontology (DO) identifiers..", - "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship.", - "The Monarch Initiative is focused primarily on phenotype-related resources and brings in data associated with those phenotypes so that the users can begin to make connections among other biological entities of interest.", - "The Disease Ontology has been developed as a standardized ontology for human disease with the purpose of providing the biomedical community with consistent, reusable and sustainable descriptions of human disease terms, phenotype characteristics and related medical vocabulary disease concepts through collaborative efforts of biomedical researchers, coordinated by the University of Maryland School of Medicine, Institute for Genome Sciences.", - "If you are using this dataset please give proper attribution to OMIM, Monarch Initiative, DO, and EMBL-EBI-UniProt and GlyGen" - ], - "score": { - "usability_domain_length": 1191 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000268/v1.0.0", - "usability_domain": [ - "The LAMPS analysis pipeline facilitates the analysis of Ligation-Mediated Amplification (LMA) sequencing data and provides a thorough assessment of a library\u2019s reads for a variety of experimental parameters", - "LAMPS is the first pipeline to provide quality control reporting of LMA primers, allowing for easy identification of problematic primer pairs during design and data analysis of LMA experiments", - "LAMPS is a three-step pipeline which maps reads from a BAM or FASTQ file to expected alignments including a barcode, forward primer, and reverse primer, provides quality control reports for reads that are either too short or do not map to the database, and normalizes the read counts to reads per million (RPM) to allow for easy integration with downstream analysis pipelines.", - "Because LAMPS can be used with 5C or 2C-ChIP protocols and the use of either BLAST or Bowtie 2, output files listed in this BCO are limited to outputs that would be obtained using any method. Because of this, not all outputs are included. Outputs that are included are named as if 2C-ChIP data was used.", - "Either BLAST or Bowtie 2 can be used for LAMPS. SAMtools is only required if sequencing files are in BAM format." - ], - "score": { - "usability_domain_length": 1189 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000099/1.0.36", - "usability_domain": [ - "A0403 is the cobas EGFR Mutation Test v2 is a real-time PCR test for the qualitative detection of defined mutations of the epidermal growth factor receptor (EGFR) gene in non-small cell lung cancer (NSCLC) patients. Defined EGFR mutations are detected using DNA isolated from formalin-fixed paraffin-embedded tumor tissue (FFPET) or circulating-free tumor DNA (cfDNA) from plasma derived from EDTA anti-coagulated peripheral whole blood.The test is indicated as a companion diagnostic to aid in selecting NSCLC patients for treatment with the targeted therapies listed ... Drug FFPET PlasmaTARCEVA (erlotinib) Exon 19 deletions and L858R Exon 19 deletions and L858R TAGRISSO (osimertinib) 790M T790M ... Table 2 below that are also detected by the cobas EGFR Mutation Test v2: Table 2Drug FFPET PlasmaTARCEVA (erlotinib) G719X, exon 20 insertions, T790M, S768I and L861Q G719X, exon 20 insertions, T790M, S768I and L861QTAGRISSO (osimertinib) G719X, exon 19 deletions, L858R, exon 20 insertions, S768I, and L861Q G719X, exon 19 deletions, L858R, exon 20 insertions, S768I, and L861Q [FTCID:P150045]" - ], - "score": { - "usability_domain_length": 1102 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_019942/1.0", - "usability_domain": [ - "In Case Study 2, the objective of CAncer bioMarker Prediction Pipeline (CAMPP) was to analyze N-glycan abundances from tumor (TIF) and normal interstitial fluids (NIF) and serum via LC-MS/MS. Data was collected from 90 female breast cancer patients and the total number N-glycan groups identified were 165. This study demonstrates the relationship of N-glycosylation of breast cancer both in vitro and in vivo. Variable selection with CAMPP is implemented through linear models from mass spectrometry data for differential abundance analysis (DAA) and LASSO/Elastic-Net Regression. The result of this pipeline are tabular files and graphical representations of the dataset being studied. The biomolecules of interest from a large dataset can be found by applying different statistical tests and machine learning approaches. CAMPP creates a standardized way to screening for cancer biomarkers and other biomolecules of interest before their implementation in potential experiments. This pipeline was created based on the work of Terkelsen et al. doi: 10.1371/journal.pcbi.1007665" - ], - "score": { - "usability_domain_length": 1078 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_063167/1.0", - "usability_domain": [ - "Pipeline for interrogating Telomerase RNA Component (TERC) - chromatin interaction. Three libraries were assembled: HiChIRP TERC sample from human female B lymphoblastoid cell line (GM12878) as well as RNase (GM12878) and HeLa cell line TERC-knockout samples as negative controls. Libraries were generated by performing HiChIP/HiChIRP (https://doi.org/10.1038/s41592-019-0407-x) and Illumina paired-end sequencing on the samples. Sequencing data is processed according to standard protocol, including alignment, duplicate removal and filtering with HiC-Pro, before generating interaction matrices and calling loops with Juicer HiCCUPS algorithm. The pipeline will produce a list of high-confidence loop calls. In the original experiment the examination of the loops indicated a high number of telomere-telomere interaction (92%); additionally, the results showed that TERC was also associated with loops beyond telomeric regions, specifically at enhancer-promoter regions of several oncogenes, implying possible role of TERC beyond telomeres." - ], - "score": { - "usability_domain_length": 1043 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_019748/1.1", - "usability_domain": [ - "Shotgun Metagenomic Sequencing, generating paired-end reads, was performed on stool samples from individuals from different regions of the Democratic Republic of Congo (DRC). Some cohorts serve as geographical and disease controls, while disease cohorts have individuals with konzo. Konzo is a distinct upper motor neuron disease prevalent in sub-Saharan Africa, and consumption of bitter cassava, high in cyanogenic glycosides, is implicated in the disease. Here, Kraken2 and Bracken (developed by JHU) are used to assess the gut microbiome of individuals from the DRC, and downstream analysis of the differences in bacterial composition can be determine using the final output from this pipeline. This pipeline removes host reads (using BMtagger) from the paired-end reads fastq files generated from four lanes per sample, removes Illumina adapters (can be modified for the specific adapter sequence) (using skewer), and assigns the reads to different taxonomic classification (with standard database) using Kraken2 and Bracken." - ], - "score": { - "usability_domain_length": 1030 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000009/1.5", - "usability_domain": [ - "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT) for the original FDA BioProject and the National Collection of Pathogenic Viruses-sc-4123.", - "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted data (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProjects (PRJNA231221, National Collection of Pathogenic Viruses). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 1022 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000038/1.0", - "usability_domain": [ - "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT) for the original FDA BioProject and the National Collection of Pathogenic Viruses-sc-4123.", - "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted data (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProjects (PRJNA231221, National Collection of Pathogenic Viruses). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 1022 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000142/v-2.0.2", - "usability_domain": [ - "The dataset provides information on O-glycosylation sites on Human proteins. The data is submitted by Dr.Christina Woo from Department of Chemistry and Chemical Biology, Harvard University. The GlyTouCan accession (G70994MS)is annotated to the glycan composition based on author's recommendation. Author's note: \"For the glycan assignment, it would be most technically correct to use a HexNAc molecule, although by and large, these are O-GlcNAc modifications, we cannot be 100% certain for every single one\". The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001). The source publication:Woo CM, Lund PJ, Huang AC, Davis MM, Bertozzi CR, Pitteri SJ. Mapping and Quantification of Over 2000 O-linked Glycopeptides in Activated Human T Cells with Isotope-Targeted Glycoproteomics (Isotag). Mol Cell Proteomics. 2018;17(4):764-75. doi: 10.1074/mcp.RA117.000261. PubMed PMID: 29351928; PubMed Central PMCID: PMCPMC5880114)." - ], - "score": { - "usability_domain_length": 992 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000142/v-2.1.1", - "usability_domain": [ - "The dataset provides information on O-glycosylation sites on Human proteins. The data is submitted by Dr.Christina Woo from Department of Chemistry and Chemical Biology, Harvard University. The GlyTouCan accession (G70994MS)is annotated to the glycan composition based on author's recommendation. Author's note: \"For the glycan assignment, it would be most technically correct to use a HexNAc molecule, although by and large, these are O-GlcNAc modifications, we cannot be 100% certain for every single one\". The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001). The source publication:Woo CM, Lund PJ, Huang AC, Davis MM, Bertozzi CR, Pitteri SJ. Mapping and Quantification of Over 2000 O-linked Glycopeptides in Activated Human T Cells with Isotope-Targeted Glycoproteomics (Isotag). Mol Cell Proteomics. 2018;17(4):764-75. doi: 10.1074/mcp.RA117.000261. PubMed PMID: 29351928; PubMed Central PMCID: PMCPMC5880114)." - ], - "score": { - "usability_domain_length": 992 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000100/1.0.36", - "usability_domain": [ - "A0404 is a biomarler panel device that is indicated for: the THERASCREEN EGFR RGQ PCR KIT is a real-time pcr test for the qualitative detection of exon 19 deletions and exon 21 (L858R) substitution mutations of the epidermal growth factor receptor (EGFR) gene in DNA derived from formalin-fixed paraffin-embedded (FFPE) non-small cell lung cancer (NSCLC) tumor tissue. the test is intended to be used to select patients with NSCLC for whom gilotrjf (afatinib), an EGFR TYROSINE KINASE INHIBITOR (TKI), is indicated. safety and efficacy of gilotrif (afatinib) have not been established in patients whose tumors have L861Q, G719X, 87681, exon 20 insertions, and T790M mutations, which are also detected by the THERASCREEN\u00a0 EGFR RGQ PCR KIT. specimens are processed using the QIAAMP\u00a0 DSP DNA FFPE TISSUE KIT for manual sample preparation and the rotor-gene\u00a0 Q MDX instrument for automated amplification and detection. [FTCID:P120022]. This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 987 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000404/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref Pharos dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Pharos UniProtKB accessions. The data is from Pharos API based on TCRD version 5.4.0. If you use this dataset please provide proper attribution to Pharos and GlyGen", - "Pharos is the user interface to the Knowledge Management Center (KMC) for the Illuminating the Druggable Genome (IDG) program funded by the National Institutes of Health.", - "The goal of KMC is to develop a comprehensive, integrated knowledge-base for the Druggable Genome (DG) to illuminate the uncharacterized and/or poorly annotated portion of the DG, focusing on three of the most commonly drug-targeted protein families: G-protein-coupled receptors (GPCRs), ion channels (ICs) and kinases", - "Nguyen, D.-T., Mathias, S. et al, Pharos: Collating Protein Information to Shed Light on the Druggable Genome , Nucl. Acids Res.i>, 2017, 45(D1), D995-D1002. DOI: 10.1093/nar/gkw1072", - "https://pharos.nih.gov/about" - ], - "score": { - "usability_domain_length": 981 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000404/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref Pharos dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Pharos UniProtKB accessions. The data is from Pharos API based on TCRD version 5.4.0. If you use this dataset please provide proper attribution to Pharos and GlyGen", - "Pharos is the user interface to the Knowledge Management Center (KMC) for the Illuminating the Druggable Genome (IDG) program funded by the National Institutes of Health.", - "The goal of KMC is to develop a comprehensive, integrated knowledge-base for the Druggable Genome (DG) to illuminate the uncharacterized and/or poorly annotated portion of the DG, focusing on three of the most commonly drug-targeted protein families: G-protein-coupled receptors (GPCRs), ion channels (ICs) and kinases", - "Nguyen, D.-T., Mathias, S. et al, Pharos: Collating Protein Information to Shed Light on the Druggable Genome , Nucl. Acids Res.i>, 2017, 45(D1), D995-D1002. DOI: 10.1093/nar/gkw1072", - "https://pharos.nih.gov/about" - ], - "score": { - "usability_domain_length": 981 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000043/1.0.36", - "usability_domain": [ - " List of human [taxid:9606] genes with healthy RNA-Seq and Affymetrix expression data in Bgee; additional documentation available at (https://github.com/BgeeDB/bgee_pipeline/tree/develop/pipeline/collaboration/oncoMX#information-about-the-files-generated-for-oncomx) Only the subset of RNA-Seq data are used to generate the expression profiles for healthy individuals for human used by OncoMX. From this data, a custom format of healthy data was generated for human containing the following information: Ensembl gene ID and UniProtKB accessions, Uberon anatomical entity IDs and names, Uberon developmental stage IDs and names, qualitative (high, medium, low, absent) reported expression levels for a queried gene with respect to all genes in a given tissue, similarly qualitative reported expression levels for a queried gene with respect to that same gene's expression across all tissues, the quality associated with the call, and a quantitative expression score based on ranks." - ], - "score": { - "usability_domain_length": 980 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000010/1.0", - "usability_domain": [ - "Reference-guided genome assemblies generated by the FDA-Argos Team using HIVE platform tools.", - "Paired-end fastq files are aligned to a NCBI GenBank sourced reference genome with HIVE-Hexagon, and a consensus fasta is obtained by HIVE-Heptagon. QC'ing for frameshift deletions and/or stop codons are detected under Phase Mutation tab in HIVE-Heptagon. The resulting assembly metrics are then analyzed using the Quast quality assessment tool. The FASTA header definition lines are generated by (example header included): (Example Header) >FA02SRR17261988|Marburg OS=Marburg marburgvirus OX=448086 OV=IRF0328_MARV_CI67 SV=1 RG=NC_001608.3; (Definitions): FA02SRR17261988 = Unique ID, FA=fasta, 02=#, SRR17261988 = sra_run_id; Marburg = UniProtKB Entry name; OS = Organism Name = Marburg marburgvirus; OX = Organism Identifier/Taxonomy = 448086; OV = Organism Variant Name = IRF0328_MARV_CI67; SV = Sequence Version = 1; RG = Reference Guided = NC_001608.3" - ], - "score": { - "usability_domain_length": 952 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000010/1.1", - "usability_domain": [ - "Reference-guided genome assemblies generated by the FDA-Argos Team using HIVE platform tools.", - "Paired-end fastq files are aligned to a NCBI GenBank sourced reference genome with HIVE-Hexagon, and a consensus fasta is obtained by HIVE-Heptagon. QC'ing for frameshift deletions and/or stop codons are detected under Phase Mutation tab in HIVE-Heptagon. The resulting assembly metrics are then analyzed using the Quast quality assessment tool. The FASTA header definition lines are generated by (example header included): (Example Header) >FA02SRR17261988|Marburg OS=Marburg marburgvirus OX=448086 OV=IRF0328_MARV_CI67 SV=1 RG=NC_001608.3; (Definitions): FA02SRR17261988 = Unique ID, FA=fasta, 02=#, SRR17261988 = sra_run_id; Marburg = UniProtKB Entry name; OS = Organism Name = Marburg marburgvirus; OX = Organism Identifier/Taxonomy = 448086; OV = Organism Variant Name = IRF0328_MARV_CI67; SV = Sequence Version = 1; RG = Reference Guided = NC_001608.3" - ], - "score": { - "usability_domain_length": 952 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000624/v-2.0.2", - "usability_domain": [ - "This mouse glycogenes dataset is retrieved from stem cell analysis which was done with qRT-PCR analysis [PMID:18411279]. It contains a list of UniProtKB accessions full and shortened mouse[taxonomy:10090] protein names mapped to NCBI gene and MGI IDs. The GT, GH, CBM and CE groups are defined by CAZy families. Following those groups are Lectins (following Kurt Drickamer\u2019s groupings), then several other glycan-related genes like GL (Glycolipid-related), GR (GAG-related), etc. If you use this dataset, please provide proper attribution to RefSeq, EMBL-EBI-UniProtKB and GlyGen. Cite:PMID:26553804, PMID:33237286 and PMID:31616925. The dataset can be used for obtaining UniProtKB recommended names (full and short) and HGNC gene names for UniProtKB canonical proteins. If you use this dataset, please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. This dataset can be used for obtaining proteins grouped as they are defined by CAZy. " - ], - "score": { - "usability_domain_length": 949 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000624/v-2.1.1", - "usability_domain": [ - "This mouse glycogenes dataset is retrieved from stem cell analysis which was done with qRT-PCR analysis [PMID:18411279]. It contains a list of UniProtKB accessions full and shortened mouse[taxonomy:10090] protein names mapped to NCBI gene and MGI IDs. The GT, GH, CBM and CE groups are defined by CAZy families. Following those groups are Lectins (following Kurt Drickamer\u2019s groupings), then several other glycan-related genes like GL (Glycolipid-related), GR (GAG-related), etc. If you use this dataset, please provide proper attribution to RefSeq, EMBL-EBI-UniProtKB and GlyGen. Cite:PMID:26553804, PMID:33237286 and PMID:31616925. The dataset can be used for obtaining UniProtKB recommended names (full and short) and HGNC gene names for UniProtKB canonical proteins. If you use this dataset, please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. This dataset can be used for obtaining proteins grouped as they are defined by CAZy. " - ], - "score": { - "usability_domain_length": 949 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_014961/1.0", - "usability_domain": [ - "Differential expression analysis based on RNA-seq data is a common method of transcriptomic analysis. Reads generated from an RNA-seq experiment are mapped to a reference genome and the resulting quantification of transcripts can be compared between case and control; potentially highlighting transcriptional (and therefore physiological) differences between samples. This paper is concerned with correctly identifying differentially expressed genes from RNA-seq data. The paper evaluates six read mapping methods and nine methods of differential expression analysis. Specifically, this pipeline uses real, RNA-Seq data from the Microarray Quality Control (MAQC) project and assesses the overlap of mapping results and differential expression results between tools. The pipeline that will become a BCO will take the RNA-seq data and use both python and R scripts to run analyses and generate a report of overlap results." - ], - "score": { - "usability_domain_length": 920 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_054419/1.0", - "usability_domain": [ - "Shotgun Metagenomic Sequencing, generating paired-end reads, was performed on stool samples from individuals from different regions of the Democratic Republic of Congo. Some cohorts serve as geographical and disease controls, while disease cohorts have individuals with konzo. Konzo is a distinct upper motor neuron disease, and consumption of bitter cassava, high in cyanogenic glucosides, is implicated in the disease. Here, Kraken2 and Bracken (developed by JHU) are used to assess the gut microbiome of individuals from the DRC, and downstream analysis of the differences in bacterial composition can be determine using the final output from this pipeline. This pipeline removes host reads (using BMtagger), removes Illumina adapters (can be modified for the specific adapters used) (using skewer), and assigns the reads to different taxonomic classification (with standard database) using Kraken2 and Bracken. " - ], - "score": { - "usability_domain_length": 915 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000623/v-2.0.2", - "usability_domain": [ - "This Human glycogenes dataset is retrieved a publication from UGA [PMID:18411279]. It contains a list of UniProtKB accessions full and shortened human [taxonomy:9606] protein names mapped to NCBI gene and HGNC IDs. The GT, GH, CBM and CE groups are defined by CAZy families. Following those groups are Lectins (following Kurt Drickamer\u2019s groupings), then several other glycan-related genes like GL (Glycolipid-related), GR (GAG-related), etc. If you use this dataset, please provide proper attribution to RefSeq, EMBL-EBI-UniProtKB and GlyGen. Cite:PMID:26553804, PMID:33237286 and PMID:31616925. The dataset can be used for obtaining UniProtKB recommended names (full and short) and HGNC gene names for UniProtKB canonical proteins. If you use this dataset, please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. This dataset can be used for obtaining proteins grouped as they are defined by CAZy." - ], - "score": { - "usability_domain_length": 911 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000623/v-2.1.1", - "usability_domain": [ - "This Human glycogenes dataset is retrieved a publication from UGA [PMID:18411279]. It contains a list of UniProtKB accessions full and shortened human [taxonomy:9606] protein names mapped to NCBI gene and HGNC IDs. The GT, GH, CBM and CE groups are defined by CAZy families. Following those groups are Lectins (following Kurt Drickamer\u2019s groupings), then several other glycan-related genes like GL (Glycolipid-related), GR (GAG-related), etc. If you use this dataset, please provide proper attribution to RefSeq, EMBL-EBI-UniProtKB and GlyGen. Cite:PMID:26553804, PMID:33237286 and PMID:31616925. The dataset can be used for obtaining UniProtKB recommended names (full and short) and HGNC gene names for UniProtKB canonical proteins. If you use this dataset, please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. This dataset can be used for obtaining proteins grouped as they are defined by CAZy." - ], - "score": { - "usability_domain_length": 911 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000009/1.0", - "usability_domain": [ - "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT). ", - "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted date (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all run IDs from the FDA ARGOS BioProject (PRJNA231221). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic analysis data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 894 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000009/1.1", - "usability_domain": [ - "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT). ", - "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted date (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all run IDs from the FDA ARGOS BioProject (PRJNA231221). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic analysis data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 894 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000009/1.2", - "usability_domain": [ - "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT). ", - "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted date (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all run IDs from the selected BioProjects. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic analysis data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 880 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000009/1.4", - "usability_domain": [ - "Quality control data extracted from NCBI's SRA Taxonomy Analysis Tool (STAT). ", - "NCBI's SRA has automatic quality assurance and quality control analyses it runs on all submitted date (see https://www.ncbi.nlm.nih.gov/sra/docs/sra-taxonomy-analysis-tool/). Any SRA sample may have multiple runs, all with their own results. For this data set we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all run IDs from the selected BioProjects. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the taxonomic analysis data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 880 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000102/1.0.36", - "usability_domain": [ - "A0406 is the therascreen KRAS RGQ PCR Kit is a real-time qualitative PCR assay used on the Rotor-Gene Q MDx instrument for the detection of seven somatic mutations in the human KRAS oncogene, using DNA extracted from formalin-fixed paraffin-embedded (FFPE), colorectal cancer (CRC) tissue. The therascreen KRAS RGQ PCR Kit is intended to aid in the identification of CRC patients for treatment with Erbitux (cetuximab) and Vectibix (panitumumab) based on a KRAS no mutation detected test result. The QIAGEN therascreen KRAS RGQ PCR Kit contains reagents for eight separate reactions; seven mutation specific reactions to amplify and detect mutations in codons 12 and 13 in exon 2 of the KRAS oncogene, and one Control Reaction that amplifies and detects a region of exon 4 in the KRAS oncogene. [FTCID:P110027]. This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 867 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_069535/1.0", - "usability_domain": [ - "A pipeline for curating the FDA's RVDB (v. 18) database of viral species. Pipeline was developed by Merck and Co., and is based on Lu and Salzberg (10.1371/journal.pcbi.1006277). Pipeline begins by i) converting viral genomes into pseudo reads, ii) alignment of pseudo reads to target database, iii) low-complexity masking, iv) masking viral genomes based on pseudo read classification, and v) addition of HIVE-specific sequence headers for integration into HIVE software. Pipeline includes a wrapper. See GitHub repository (linked in Extension Domain) for more information. The wrapper script reference (https://github.com/Merck/curation-open-source/blob/master/run_curation.sh) bundles the entire pipeline and enables execution in an HPC environment up to 200GB memory (which is required for Kraken2). This implementation was submitted using the qsub utility." - ], - "score": { - "usability_domain_length": 861 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_069535/2.0", - "usability_domain": [ - "A pipeline for curating the FDA's RVDB (v. 18) database of viral species. Pipeline was developed by Merck and Co., and is based on Lu and Salzberg (10.1371/journal.pcbi.1006277). Pipeline begins by i) converting viral genomes into pseudo reads, ii) alignment of pseudo reads to target database, iii) low-complexity masking, iv) masking viral genomes based on pseudo read classification, and v) addition of HIVE-specific sequence headers for integration into HIVE software. Pipeline includes a wrapper. See GitHub repository (linked in Extension Domain) for more information. The wrapper script reference (https://github.com/Merck/curation-open-source/blob/master/run_curation.sh) bundles the entire pipeline and enables execution in an HPC environment up to 200GB memory (which is required for Kraken2). This implementation was submitted using the qsub utility." - ], - "score": { - "usability_domain_length": 861 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000085/1.0.36", - "usability_domain": [ - "A list of human (taxid:9606) biomarkers, for COVID-19 (DOID:0080600), including overlap with diabetes mellitus (DOID:9351), and 15 high-level categories of cancer (DOID:162), manually curated from scientific publications recorded in PubMed (https://pubmed.ncbi.nlm.nih.gov/) and also retrieved from EDRN (Early Detection Research Network; https://edrn.nci.nih.gov/), FDA (U.S. Food and Drug Administration; https://www.fda.gov/). EDRN is an NCI collaboration dedicated to discovery of (early) cancer biomarkers. Logged datatypes for biomarkers include supporting literature evidence (source ID and scientific statements); specimen, biomarker, and disease types; the biomarker entity and its measurement modality (e.g., increased expression); and cross reference (programmatic linkage or resource ID) to established data repositories, models, and codes." - ], - "score": { - "usability_domain_length": 852 - } - }, - { - "object_id": "https://biocomputeobject.org/DEMO_000001/0.50.0", - "usability_domain": [ - "The workflow starts with selecting ACE2 as the search term. More information about the gene was then obtained with the MyGene.info API [1,2]. Next, the GlyGen database [3] was searched to identify a relevant set of proteins that originate from ACE2. The glycosylation data was extracted from the GlyGen protein response and prepared for presentation in the view metanode.\n\n1. Xin, J. et al. High-performance web services for querying gene and variant annotation. Genome Biology vol. 17 (2016). doi:10.1186/s13059-016-0953-9\n2. Wu, C., MacLeod, I. & Su, A. I. BioGPS and MyGene.info: organizing online, gene-centric information. Nucleic Acids Research vol. 41 D561\u2013D565 (2012). doi:10.1093/nar/gks1114\n3. York, W. S. et al. GlyGen: Computational and Informatics Resources for Glycoscience. Glycobiology vol. 30 72\u201373 (2019). doi:10.1093/glycob/cwz080" - ], - "score": { - "usability_domain_length": 849 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000012/1.5", - "usability_domain": [ - "QC of genome assemblies (FASTA files) using HIVE Platform. ", - "Genome assemblies are either selected from the FDA ARGOS BioProject and QC'd, or genome assemblies (FASTA files) are generated by the FDA-ARGOS Team and then QC'd. Some genome assemblies represented in the dataset originated outside of the FDA-ARGOS BioProject. The tools used within the HIVE platform include an alignment and variant calling pipeline, tools HIVE-Hexagon and HIVE-Heptagon, as well as Quast. The QC data represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies. A secondary use case is to assist in selecting high quality assemblies out of the genome assemblies generated by the FDA-ARGOS Team. " - ], - "score": { - "usability_domain_length": 843 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000012/1.6", - "usability_domain": [ - "QC of genome assemblies (FASTA files) using HIVE Platform. ", - "Genome assemblies are either selected from the FDA ARGOS BioProject and QC'd, or genome assemblies (FASTA files) are generated by the FDA-ARGOS Team and then QC'd. Some genome assemblies represented in the dataset originated outside of the FDA-ARGOS BioProject. The tools used within the HIVE platform include an alignment and variant calling pipeline, tools HIVE-Hexagon and HIVE-Heptagon, as well as Quast. The QC data represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies. A secondary use case is to assist in selecting high quality assemblies out of the genome assemblies generated by the FDA-ARGOS Team. " - ], - "score": { - "usability_domain_length": 843 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000012/1.7", - "usability_domain": [ - "QC of genome assemblies (FASTA files) using HIVE Platform. ", - "Genome assemblies are either selected from the FDA-ARGOS BioProject and QC'd, or genome assemblies (FASTA files) are generated by the FDA-ARGOS Team and then QC'd. Some genome assemblies represented in the dataset originated outside of the FDA-ARGOS BioProject. The tools used within the HIVE platform include an alignment and variant calling pipeline, tools HIVE-Hexagon and HIVE-Heptagon, as well as Quast. The QC data represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies. A secondary use case is to assist in selecting high quality assemblies out of the genome assemblies generated by the FDA-ARGOS Team. " - ], - "score": { - "usability_domain_length": 843 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000012/1.8", - "usability_domain": [ - "QC of genome assemblies (FASTA files) using HIVE Platform. ", - "Genome assemblies are either selected from the FDA-ARGOS BioProject and QC'd, or genome assemblies (FASTA files) are generated by the FDA-ARGOS Team and then QC'd. Some genome assemblies represented in the dataset originated outside of the FDA-ARGOS BioProject. The tools used within the HIVE platform include an alignment and variant calling pipeline, tools HIVE-Hexagon and HIVE-Heptagon, as well as Quast. The QC data represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies. A secondary use case is to assist in selecting high quality assemblies out of the genome assemblies generated by the FDA-ARGOS Team. " - ], - "score": { - "usability_domain_length": 843 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_027463/1.0", - "usability_domain": [ - "Alternative splicing is when multiple isoforms are made from the same gene due to alternative selection of exons or splice sites. Although alternative splicing generally produces linear RNA, one largely unexplored form leads to a circular form of RNA, called circRNA. There is a great amount of interest in these sequences and their interactions because they may be implicated in diseases such as cancer. As such, there have been numerous tools to find and detect circRNA sequences. Present tools only detect presence/absence of the sequence however and do not account for conditions where circRNA is differentially expressed, as it often is. SeekCRIT aims to solve this issue by offering detection, quantification, and statistical analysis for circRNAs. In this work, seekCRIT's capabilities are assessed using publicly available RNA data." - ], - "score": { - "usability_domain_length": 840 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000143/v-2.0.2", - "usability_domain": [ - "The dataset provides information on N-glycosylation sites on Human proteins. The data has been processed from the supplementary material from 2 publications (1. \"Deeb, S. J., Cox, J., Schmidt-Supprian, M., & Mann, M. (2013). N-linked Glycosylation Enrichment for In-depth Cell Surface Proteomics of Diffuse Large B-cell Lymphoma Subtypes. Molecular & Cellular Proteomics, 13(1), 240-251. doi:10.1074/mcp.m113.033977\" 2. \"Boersema, P. J., Geiger, T., Winiewski, J. R., & Mann, M. (2012). Quantification of the N-glycosylated Secretome by Super-SILAC During Breast Cancer Progression and in Human Blood Samples. Molecular & Cellular Proteomics, 12(1), 158-171. doi:10.1074/mcp.m112.023614\"). The listed proteins (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001). " - ], - "score": { - "usability_domain_length": 821 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000143/v-2.1.1", - "usability_domain": [ - "The dataset provides information on N-glycosylation sites on Human proteins. The data has been processed from the supplementary material from 2 publications (1. \"Deeb, S. J., Cox, J., Schmidt-Supprian, M., & Mann, M. (2013). N-linked Glycosylation Enrichment for In-depth Cell Surface Proteomics of Diffuse Large B-cell Lymphoma Subtypes. Molecular & Cellular Proteomics, 13(1), 240-251. doi:10.1074/mcp.m113.033977\" 2. \"Boersema, P. J., Geiger, T., Winiewski, J. R., & Mann, M. (2012). Quantification of the N-glycosylated Secretome by Super-SILAC During Breast Cancer Progression and in Human Blood Samples. Molecular & Cellular Proteomics, 12(1), 158-171. doi:10.1074/mcp.m112.023614\"). The listed proteins (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001). " - ], - "score": { - "usability_domain_length": 821 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000453/1.0", - "usability_domain": [ - "Identifying disease subtypes (cancer) is clinically very significant in patient survival prediction", - "It explicitly models patient survival as the objective and is predictive of new patient survival risks. DeepProg constructs a flexible ensemble of hybrid-models (a combination of deep-learning and machine learning models) and integrates their outputs following the ensemble learning paradigm.", - "DeepProg was applied on RNA-Seq, Methylation and miRNA data from 32 cancers in The Cancer Genome Atlas (TCGA),from NCBI, with a total of around 10,000 samples.", - "The results from the DeepProg method are compared to results from the Similarity Network Fusion (SNF) algorithm, used to identify cancer subtypes linked to survival by others. ", - "In all, DeepProg yields much better log-rank p values and C-indices than the SNF method. " - ], - "score": { - "usability_domain_length": 815 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000453/2.0", - "usability_domain": [ - "Identifying disease subtypes (cancer) is clinically very significant in patient survival prediction", - "It explicitly models patient survival as the objective and is predictive of new patient survival risks. DeepProg constructs a flexible ensemble of hybrid-models (a combination of deep-learning and machine learning models) and integrates their outputs following the ensemble learning paradigm.", - "DeepProg was applied on RNA-Seq, Methylation and miRNA data from 32 cancers in The Cancer Genome Atlas (TCGA),from NCBI, with a total of around 10,000 samples.", - "The results from the DeepProg method are compared to results from the Similarity Network Fusion (SNF) algorithm, used to identify cancer subtypes linked to survival by others. ", - "In all, DeepProg yields much better log-rank p values and C-indices than the SNF method. " - ], - "score": { - "usability_domain_length": 815 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000010/1.4", - "usability_domain": [ - "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI.", - "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. We also ran the same script on assemblies in the ARGOS DB that were not in the original BioProject and then combined both results into one list. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." - ], - "score": { - "usability_domain_length": 814 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000010/1.5", - "usability_domain": [ - "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI.", - "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. We also ran the same script on assemblies in the ARGOS DB that were not in the original BioProject and then combined both results into one list. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." - ], - "score": { - "usability_domain_length": 814 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000438/0.1", - "usability_domain": [ - "For this data set, we pulled biomarker data from OpenTargets. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." - ], - "score": { - "usability_domain_length": 795 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000438/0.2", - "usability_domain": [ - "For this data set, we pulled biomarker data from OpenTargets. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." - ], - "score": { - "usability_domain_length": 795 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000499/v-2.0.2", - "usability_domain": [ - "The GlyGen-PubChem Protein Cross-references dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of PubChem Protein to GlyGen cross-references. In the dataset, the UniProtKB glycoprotein accessions are mapped to the RefSeq accessions and glycosylation annotation corresponding to glycoprotein have been added. The glycosylation annotation provides information about total (N,O,S,C) glycosylation sites followed by (if available) reported N-glycans and/or reported O-glycans with the number of sites. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 792 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000499/v-2.1.1", - "usability_domain": [ - "The GlyGen-PubChem Protein Cross-references dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of PubChem Protein to GlyGen cross-references. In the dataset, the UniProtKB glycoprotein accessions are mapped to the RefSeq accessions and glycosylation annotation corresponding to glycoprotein have been added. The glycosylation annotation provides information about total (N,O,S,C) glycosylation sites followed by (if available) reported N-glycans and/or reported O-glycans with the number of sites. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 792 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000442/0.2", - "usability_domain": [ - "For this data set, we pulled biomarker data from MarkerDB. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." - ], - "score": { - "usability_domain_length": 792 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000442/0.3", - "usability_domain": [ - "For this data set, we pulled biomarker data from MarkerDB. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." - ], - "score": { - "usability_domain_length": 792 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000443/0.2", - "usability_domain": [ - "For this data set, we pulled biomarker data from ClinVar. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." - ], - "score": { - "usability_domain_length": 791 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000443/0.3", - "usability_domain": [ - "For this data set, we pulled biomarker data from ClinVar. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." - ], - "score": { - "usability_domain_length": 791 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000441/0.1", - "usability_domain": [ - "For this data set, we pulled biomarker data from GWAS. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." - ], - "score": { - "usability_domain_length": 788 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000441/0.2", - "usability_domain": [ - "For this data set, we pulled biomarker data from GWAS. From the raw TSV, the data was cleaned and extracted. Data that was pulled were the biomarkers, assessed biomarker entity, specimen, and condition for the biomarker. Condition ID, specimen ID, and assessed biomarker entity ID were mapped from relevant external resources to the table. The first version of the data was mapped to the old version of the biomarker data model. The data was then reprocessed and cleaned to be mapped into the new biomarker data model that was set by the Biomarker Partnership. Temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers." - ], - "score": { - "usability_domain_length": 788 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000379/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Virus Pathogen Resource contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Virus Pathogen Database and Analysis Resource (ViPR) is an integrative and comprehensive publicly available database and analysis resource to search, analyze, visualize, save and share data for viral pathogens in the U.S. National Institute of Allergy and Infectious Diseases (NIAID) Category A-C Priority Pathogen lists for biodefense research, and other viral pathogens causing emerging/reemerging infectious diseases." - ], - "score": { - "usability_domain_length": 780 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000378/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Virus Pathogen Resource (ViPR) contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Virus Pathogen Database and Analysis Resource (ViPR) is an integrative and comprehensive publicly available database and analysis resource to search, analyze, visualize, save and share data for viral pathogens in the U.S. National Institute of Allergy and Infectious Diseases (NIAID) Category A-C Priority Pathogen lists for biodefense research, and other viral pathogens causing emerging/reemerging infectious diseases." - ], - "score": { - "usability_domain_length": 780 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000716/v-2.1.1", - "usability_domain": [ - "The Human O-Gluc Glycosylation Sites contains human (taxid:9606) O-Glucosylation sites from OGLUT2 and POGLUT3 O - glucosylate multiple EGF repeats in fibrillin - 1, -2, and LTBP1 and promote secretion of fibrillin - 1 [PMID:34411563] (experimental) and based on the consensus sequence the other sites (predicted) were detected using Prosite Prorule. The GlyTouCan ID for O-Gluc is G71142DF and the consensus sequence for O-gluc glycosylation is C3-x-N-T-x-G-S-(FY)-x-C4. The enzymes for o-glucosylation is POGLUT2 and POGLUT3. The dataset also contains Glycosylation stoichiometry: The relative amount (percentage) of a glycoform based on peptide signal intensity determined by mass spectrometry.", - "The dataset is generated in the lab of Robert S Haltiwanger by Daniel Williamson." - ], - "score": { - "usability_domain_length": 778 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000020/1.2", - "usability_domain": [ - "The BiosampleMeta dataset is generated by both scripts and manual input from NCBI's BioSample database, and undergoes manual curation and review. ", - "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files and genome assemblies selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The metadata represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. The primary use case for this data set is to track sample metadata associated with raw sequencing files and genome assemblies selected for QC, and a secondary use case is to provide a method to quickly evaluate the metadata associated with specific biosamples." - ], - "score": { - "usability_domain_length": 769 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000020/1.3", - "usability_domain": [ - "The BiosampleMeta dataset is generated by both scripts and manual input from NCBI's BioSample database, and undergoes manual curation and review. ", - "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files and genome assemblies selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The metadata represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. The primary use case for this data set is to track sample metadata associated with raw sequencing files and genome assemblies selected for QC, and a secondary use case is to provide a method to quickly evaluate the metadata associated with specific biosamples." - ], - "score": { - "usability_domain_length": 769 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000020/1.5", - "usability_domain": [ - "The BiosampleMeta dataset is generated by both scripts and manual input from NCBI's BioSample database, and undergoes manual curation and review. ", - "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files and genome assemblies selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The metadata represented in this dataset is derived from Mazumder, Crandall, and Pond Labs. The primary use case for this data set is to track sample metadata associated with raw sequencing files and genome assemblies selected for QC, and a secondary use case is to provide a method to quickly evaluate the metadata associated with specific biosamples." - ], - "score": { - "usability_domain_length": 769 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000028/v-2.0.2", - "usability_domain": [ - "The Gene Expression (Normal) dataset contains the normal expression information of human [taxid:9606] genes from BioXpress database. The GlyGen Interface shows the normal gene expression information for select UBERON ids only which are mapped to corresponding DOIDs and have gene expression in cancer information for those DOID. For more gene expression information of other UBERON IDs (tissues) for a given gene, please refer to BioXpress - https://hive.biochemistry.gwu.edu/bioxpress. BioXpress is a gene/miRNA expression and disease association database with expression levels mapped to genes or miRNAs. The current version of BioXpress contains only genes associated with cancer. If you are using this dataset please give proper attribution to GlyGen and BioXpress" - ], - "score": { - "usability_domain_length": 768 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000028/v-2.1.1", - "usability_domain": [ - "The Gene Expression (Normal) dataset contains the normal expression information of human [taxid:9606] genes from BioXpress database. The GlyGen Interface shows the normal gene expression information for select UBERON ids only which are mapped to corresponding DOIDs and have gene expression in cancer information for those DOID. For more gene expression information of other UBERON IDs (tissues) for a given gene, please refer to BioXpress - https://hive.biochemistry.gwu.edu/bioxpress. BioXpress is a gene/miRNA expression and disease association database with expression levels mapped to genes or miRNAs. The current version of BioXpress contains only genes associated with cancer. If you are using this dataset please give proper attribution to GlyGen and BioXpress" - ], - "score": { - "usability_domain_length": 768 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000084/1.0.36", - "usability_domain": [ - "Human [taxid:9606] cancer mutations from the Biomuta project and filtered for specific cancers presented on the OncoMX and Glygen front end web pages. The cancers included in this dataset are: Stomach Cancer (DOID:10534), Thyroid Cancer (DOID:1781), Esophageal Cancer (DOID:5041), Kidney Cancer (DOID:263), Lung Cancer (DOID:1324), Uterine Cancer (DOID:363), Urinary Bladder Cancer (DOID:11054), Prostate Cancer (DOID:10283), Colorectal Cancer (DOID:9256), Liver Cancer (DOID:3571), Cervical Cancer (DOID:4362), Breast Cancer (DOID:1612), Brain Cancer (DOID:1319), Hematologic Cancer (DOID:2531), Head and Neck Cancer (DOID:11934), Adrenal Gland Cancer (DOID:3953), Pancreatic Cancer (DOID:1793), Ovarian Cancer (DOID:2394), and Skin Cancer (DOID:4159)" - ], - "score": { - "usability_domain_length": 752 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000296/v-2.0.2", - "usability_domain": [ - " The dataset provides cross-references to ChEBI Id's for the associated glycan (GlyTouCan Accession). The mapping is produced using the direct download files(database_accession.tsv) from the ChEBI-EBI FTP (ftp://ftp.ebi.ac.uk/pub/databases/chebi/). The glycans are mapped/registered into the ChEBI database as follows: 1) GlyTouCan accession is mapped directly to ChEBI ID through PubChem CID (if the PubChem CID has a ChEBI cross-reference) 2) glycans with PubChem CID (which lack a ChEBI cross-reference) are integrated into ChEBI through applications like KNIME (https://europepmc.org/article/med/28757290) and ClassyFire (https://pubmed.ncbi.nlm.nih.gov/27867422/). 2) glycans without a PubChem CID are registered manually into the CHEBI database." - ], - "score": { - "usability_domain_length": 751 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000296/v-2.1.1", - "usability_domain": [ - " The dataset provides cross-references to ChEBI Id's for the associated glycan (GlyTouCan Accession). The mapping is produced using the direct download files(database_accession.tsv) from the ChEBI-EBI FTP (ftp://ftp.ebi.ac.uk/pub/databases/chebi/). The glycans are mapped/registered into the ChEBI database as follows: 1) GlyTouCan accession is mapped directly to ChEBI ID through PubChem CID (if the PubChem CID has a ChEBI cross-reference) 2) glycans with PubChem CID (which lack a ChEBI cross-reference) are integrated into ChEBI through applications like KNIME (https://europepmc.org/article/med/28757290) and ClassyFire (https://pubmed.ncbi.nlm.nih.gov/27867422/). 2) glycans without a PubChem CID are registered manually into the CHEBI database." - ], - "score": { - "usability_domain_length": 751 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000079/1.0.36", - "usability_domain": [ - "A list of human (taxid:9606) biomarkers, for several high-level categories of cancer (DOID:162), retrieved from EDRN (Early Detection Research Network; https://edrn.nci.nih.gov/), FDA (U.S. Food and Drug Administration; https://www.fda.gov/), or scientific publications recorded in PubMed (https://pubmed.ncbi.nlm.nih.gov/). EDRN is an NCI collaboration dedicated to discovery of (early) cancer biomarkers. Logged datatypes for biomarkers include supporting literature evidence (source ID and scientific statements); specimen, biomarker, and disease types; the biomarker entity and its measurement modality (e.g., increased expression); and cross reference (programmatic linkage or resource ID) to established data repositories, models, and codes. " - ], - "score": { - "usability_domain_length": 748 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000410/v-2.0.2", - "usability_domain": [ - "The GlyGen-UniProtKB Crossreferences dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of UniProtKB to GlyGen crossreferences. In the dataset, the UniProtKB glycoprotein accessions have corresponding glycosylation annotation present in the datasets. The glycosylation annotation provides information about total (N,O,S,C) glycosylation sites followed by (if available) reported N-glycans and/or reported O-glycans with the number of sites. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 733 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000410/v-2.1.1", - "usability_domain": [ - "The GlyGen-UniProtKB Crossreferences dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of UniProtKB to GlyGen crossreferences. In the dataset, the UniProtKB glycoprotein accessions have corresponding glycosylation annotation present in the datasets. The glycosylation annotation provides information about total (N,O,S,C) glycosylation sites followed by (if available) reported N-glycans and/or reported O-glycans with the number of sites. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 733 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000228/v-2.0.2", - "usability_domain": [ - "The Human Protein Signal Peptide dataset contains human [taxid:9606] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and PMIDs. The dataset contains related files in the output subdomain that includes processed protein sequence files in fasta format viz. signal peptide sequence, protein sequence after signal peptide is cleaved, and full protein sequences of the proteins that contain signal peptide. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 731 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000228/v-2.1.1", - "usability_domain": [ - "The Human Protein Signal Peptide dataset contains human [taxid:9606] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and PMIDs. The dataset contains related files in the output subdomain that includes processed protein sequence files in fasta format viz. signal peptide sequence, protein sequence after signal peptide is cleaved, and full protein sequences of the proteins that contain signal peptide. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 731 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000101/1.0.36", - "usability_domain": [ - "A405 is the cobas KRAS Mutation Test, for use with the cobas 4800 System, is a real-time PCR test for the detection of seven somatic mutations in codons 12 and 13 of the KRAS gene in DNA derived from formalin-fixed paraffin-embedded human colorectal cancer (CRC) tumor tissue. The test is intended to be used as an aid in the identification of CRC patients for whom treatment with Erbitux (cetuximab) or with Vectibix (panitumumab) may be indicated based on a no mutation detected result. Specimens are processed using the cobas DNA Sample Preparation Kit for manual sample preparation and the cobas z 480 analyzer for automated amplification and detection. [FTCID:P140023]. This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 730 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000456/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniprotKB Xref RefSeq contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2020-01 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." - ], - "score": { - "usability_domain_length": 724 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000456/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniprotKB Xref RefSeq contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2020-01 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." - ], - "score": { - "usability_domain_length": 724 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_095544/1.0", - "usability_domain": [ - "Pipeline for investigating the infection process by parasitic nematodes, using H.bacteriophora as a model to identify infection genes. Reads from two RNA samples of H.bacteriophora infective juveniles, one treated with the insect Manduca sexta for 9 hours and one 0 hr untreated control, were collected, trimmed, and analyzed by mapping to the reference genome. Finally, mapped reads were assigned genomic features in order to provide summarized data of the coverage for the genomic features of interest. Data from this pipeline can also be analyzed further by expression analysis to identify differentially expressed genes. This example pipeline was created based on the work of Adnal et al. doi: 10.1186/s12864-016-3468-6" - ], - "score": { - "usability_domain_length": 723 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000455/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniprotKB Xref RefSeq contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2020-01 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." - ], - "score": { - "usability_domain_length": 710 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000455/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniprotKB Xref RefSeq contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2020-01 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." - ], - "score": { - "usability_domain_length": 710 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000069/1.0", - "usability_domain": [ - "\"Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]\",\n \"Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure\",\n \"Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus\",\n \"GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20\"" - ], - "score": { - "usability_domain_length": 709 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000443/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Signal Peptide dataset contains sarscov2 [taxid:2697049] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 704 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000443/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Signal Peptide dataset contains sarscov2 [taxid:2697049] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 704 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000009/1.7", - "usability_domain": [ - "NGS quality control metrics extracted from the current FDA-ARGOS BioProject - Sequence Read Archive (SRA).", - "For this data set, we ran the NCBI eutils efetch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the current FDA-ARGOS BioProject PRJNA231221. This returned 3657 Ids. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 703 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000009/1.8", - "usability_domain": [ - "NGS quality control metrics extracted from the current FDA-ARGOS BioProject - Sequence Read Archive (SRA).", - "For this data set, we ran the NCBI eutils efetch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the current FDA-ARGOS BioProject PRJNA231221. This returned 3657 Ids. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 703 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000009/1.9", - "usability_domain": [ - "NGS quality control metrics extracted from the current FDA-ARGOS BioProject - Sequence Read Archive (SRA).", - "For this data set, we ran the NCBI eutils efetch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the current FDA-ARGOS BioProject PRJNA231221. This returned 3657 Ids. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 703 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000104/1.0.36", - "usability_domain": [ - "A0408 is a panel device indicated for: the THXID BRAF kit is an in vitro diagnostic device intended for the qualitative detection of the BRAF V600E and V600K mutations in DNA samples extracted from formalin-fixed paraffin embedded (ffpe) human melanoma tissue. the THXID BRAF KIT is a real-time PCR test on the abi 7500 fast dx system and is intended to be used as an aid in selecting melanoma patients whose tumors carry the BRAF v600e mutation for treatment with dabrafenib [tafinlar ] and as an aid in selecting melanoma patients whose tumors carry the BRAF v600e or v600k mutation for treatment with trametinib [mekinist]. [FTCID:P120014]. This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 699 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000442/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Signal Peptide dataset contains sarscov1 [taxid:694009] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 690 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000442/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Signal Peptide dataset contains sarscov1 [taxid:694009] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 690 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000458/1.0", - "usability_domain": [ - "Using ML to predict moonlighting DNA-binding proteins (mDBPs) from other DNA-binding proteins (oDBPS) to advance our understanding of multifunctional proteins", - "Made use of publicly available moonlighting databases to develop trainable models", - "Moonlighting have been taken from Moonprot 3.0, MultitaskProtDB-II and MoonDB 2.0 ", - "mDBPs can indeed be predicted from proposed feature sets with reasonable confidence.(feature sets:\n(i) single protein sequence and predicted binding site features, (ii) sequence-based evolutionary features, (iii) network features based on protein\u2013protein interactions, (iv) sequence-predicted secondary structural features and (v) global gene expression profiles.)" - ], - "score": { - "usability_domain_length": 685 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000127/v-2.0.2", - "usability_domain": [ - "The Human UniprotKB Xref RefSeq contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " - ], - "score": { - "usability_domain_length": 684 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000128/v-2.0.2", - "usability_domain": [ - "The Mouse UniprotKB Xref RefSeq contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " - ], - "score": { - "usability_domain_length": 684 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000127/v-2.1.1", - "usability_domain": [ - "The Human UniprotKB Xref RefSeq contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " - ], - "score": { - "usability_domain_length": 684 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000128/v-2.1.1", - "usability_domain": [ - "The Mouse UniprotKB Xref RefSeq contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " - ], - "score": { - "usability_domain_length": 684 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000062/1.0", - "usability_domain": [ - "- Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]\n- Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure\n- Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus\n- GitHub CWL example: https://github.com/mr-c/hive-cwl-examples/blob/master/workflow/hive-viral-mutation-detection.cwl#L20" - ], - "score": { - "usability_domain_length": 682 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000653/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Signal Peptide dataset contains fruitfly [taxid:7227] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and PMIDs. The dataset contains related files in the output subdomain that includes processed protein sequence files in fasta format viz. Signal peptide sequence, protein sequence after signal peptide is cleaved, and full protein sequences of the proteins that contain signal peptide. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 682 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000653/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Signal Peptide dataset contains fruitfly [taxid:7227] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and PMIDs. The dataset contains related files in the output subdomain that includes processed protein sequence files in fasta format viz. Signal peptide sequence, protein sequence after signal peptide is cleaved, and full protein sequences of the proteins that contain signal peptide. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 682 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000059/1.0.36", - "usability_domain": [ - "List of human [taxid:9606] n-glycans attached to serum proteins expressed by liver tissue for patients with cirrhosis, hepatocellular carcinoma, or transplant. The measured glycan data were generated in collaboration with the IMAT Glyco-typer project and mapped to UniProtKB accessions of serum glycoproteins, Uberon anatomical entity IDs, glycan composition (m/z value-based), and GlyTouCan IDs. The dataset displays normalized intensity values (quantities) of specific n-glycans detected by Matrix Assisted Laser Desorption Ionization Mass Spectrometry Imaging (MALDI-MSI) for specific serum proteins in hepatocellular carcinoma, cirrhosis, and liver transplant patient cohorts." - ], - "score": { - "usability_domain_length": 680 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000693/v-2.0.2", - "usability_domain": [ - "The dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Gene and Drug Landing Page Aggregator resource. If you use this dataset please provide proper attribution to Gene and Drug Landing Page Aggregator and GlyGen", - "Gene and Drug Landing Page Aggregator (GDLPA) has links to 53 gene, 18 variant and 19 drug repositories that provide direct links to gene and drug landing pages. You can search by gene or drug name and then choose the sites that contain knowledge about your gene or drug of interest. Resources supported by the NIH Common Fund are listed first and have the CFDE logo at their top right corner - https://cfde-gene-pages.cloud/." - ], - "score": { - "usability_domain_length": 674 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000693/v-2.1.1", - "usability_domain": [ - "The dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Gene and Drug Landing Page Aggregator resource. If you use this dataset please provide proper attribution to Gene and Drug Landing Page Aggregator and GlyGen", - "Gene and Drug Landing Page Aggregator (GDLPA) has links to 53 gene, 18 variant and 19 drug repositories that provide direct links to gene and drug landing pages. You can search by gene or drug name and then choose the sites that contain knowledge about your gene or drug of interest. Resources supported by the NIH Common Fund are listed first and have the CFDE logo at their top right corner - https://cfde-gene-pages.cloud/." - ], - "score": { - "usability_domain_length": 674 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000335/v-2.0.2", - "usability_domain": [ - "The HCV1a [TaxID:1108] glycosylations sites dataset contains glycosylation sites experimentally reported on Hepatitis C Virus 1a protein. The data was extracted from publication PMID:18187336. (Iacob RE, Perdivara I, Przybylski M, Tomer KB. Mass spectrometric characterization of glycosylation of hepatitis C virus E2 envelope glycoprotein reveals extended microheterogeneity of N-glycans. J Am Soc Mass Spectrom. 2008;19(3):428\u2013444. doi:10.1016/j.jasms.2007.11.022). The glycan compositions from publication [PMID:18187336] were assigned to GlyTouCan accessions by UniCarbKB. [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]" - ], - "score": { - "usability_domain_length": 671 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000010/1.6", - "usability_domain": [ - "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI.", - "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from the BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." - ], - "score": { - "usability_domain_length": 671 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000335/v-2.1.1", - "usability_domain": [ - "The HCV1a [TaxID:1108] glycosylations sites dataset contains glycosylation sites experimentally reported on Hepatitis C Virus 1a protein. The data was extracted from publication PMID:18187336. (Iacob RE, Perdivara I, Przybylski M, Tomer KB. Mass spectrometric characterization of glycosylation of hepatitis C virus E2 envelope glycoprotein reveals extended microheterogeneity of N-glycans. J Am Soc Mass Spectrom. 2008;19(3):428\u2013444. doi:10.1016/j.jasms.2007.11.022). The glycan compositions from publication [PMID:18187336] were assigned to GlyTouCan accessions by UniCarbKB. [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]" - ], - "score": { - "usability_domain_length": 671 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000010/1.7", - "usability_domain": [ - "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI", - "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from the BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." - ], - "score": { - "usability_domain_length": 670 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000010/1.9", - "usability_domain": [ - "BioSample metadata extracted from the original FDA ARGOS BioProject in NCBI", - "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from the BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific BioSamples, or a subset of the files referenced in this set." - ], - "score": { - "usability_domain_length": 670 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000625/v-2.1.1", - "usability_domain": [ - "This is a Human Protein Biomarkers dataset. It contains human [taxonomy:9606] protein biomarker information, including cross-mapped UniProtKB or Protein Ontology accessions, assessed biomarker entity, biomarker status, biomarker type, specimen type, LOINC code, disease name, and source evidence from the cancer biomarker portal (https://data.oncomx.org/cancerbiomarkers). This dataset can be used to obtain biomarker information for 19 cancer types and COVID-19. If you use this dataset, please provide proper attribution to OncoMX and GlyGen. Cite: PMID:32142370, PMID:34015823, and PMID:31616925. The dataset was earlier called as human_protein_cancer_biomarkers.csv" - ], - "score": { - "usability_domain_length": 669 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000010/1.3", - "usability_domain": [ - "BioSample metadata extracted from the original FDA BioProject in NCBI. ", - "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set (BioSample data under the BioProject: PRJNA231221), NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the data associated with specific biosamples, or a subset of the files referenced in this set." - ], - "score": { - "usability_domain_length": 664 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000038/1.3", - "usability_domain": [ - "Assembly quality control data extracted from NCBI's SRA for the current FDA ARGOS BioProject.", - "\nFor this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all assembly IDs from the selected BioProject (PRJNA231221). Once we had the list of assembly ids, we constructed a CURL command to download each of the assembly XML files containing the assembly quality control data. The resulting file contains quality control data for all assemblies in the BioProject (PRJNA231221). The primary use case for this data set is to provide a file that contains quality control data for all assemblies in the selected BioProject." - ], - "score": { - "usability_domain_length": 663 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000038/1.4", - "usability_domain": [ - "Assembly quality control data extracted from NCBI's SRA for the current FDA ARGOS BioProject.", - "\nFor this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all assembly IDs from the selected BioProject (PRJNA231221). Once we had the list of assembly ids, we constructed a CURL command to download each of the assembly XML files containing the assembly quality control data. The resulting file contains quality control data for all assemblies in the BioProject (PRJNA231221). The primary use case for this data set is to provide a file that contains quality control data for all assemblies in the selected BioProject." - ], - "score": { - "usability_domain_length": 663 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000457/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Signal Peptide dataset contains mouse [taxid:10090] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 662 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000628/v-2.0.2", - "usability_domain": [ - "This is a SARS-CoV1 Glycosylation Sites (UniCarbKB) dataset. It contains a list of SARS coronavirus (SARS-CoV-1) [taxonomy:694009] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB. The listed proteins (UniProtKB accession) are part of the GlyGen UniProtKB SARS CoV1 Proteome Masterlist (https://data.glygen.org/GLY_000467). If you use this dataset, please provide proper attribution to UniCarbKB, EMBL-EBI-UniProtKB, and GlyGen. Cite: PMID:24234447, PMID:33237286, and PMID:31616925. This dataset can be used for obtaining a list of glycosylation sites for canonical glycoproteins from the UnicarbKB database." - ], - "score": { - "usability_domain_length": 662 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000457/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Signal Peptide dataset contains mouse [taxid:10090] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 662 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000628/v-2.1.1", - "usability_domain": [ - "This is a SARS-CoV1 Glycosylation Sites (UniCarbKB) dataset. It contains a list of SARS coronavirus (SARS-CoV-1) [taxonomy:694009] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB. The listed proteins (UniProtKB accession) are part of the GlyGen UniProtKB SARS CoV1 Proteome Masterlist (https://data.glygen.org/GLY_000467). If you use this dataset, please provide proper attribution to UniCarbKB, EMBL-EBI-UniProtKB, and GlyGen. Cite: PMID:24234447, PMID:33237286, and PMID:31616925. This dataset can be used for obtaining a list of glycosylation sites for canonical glycoproteins from the UnicarbKB database." - ], - "score": { - "usability_domain_length": 662 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000458/v-2.0.2", - "usability_domain": [ - "The Rat Protein Signal Peptide dataset contains rat [taxid:10116] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 658 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000458/v-2.1.1", - "usability_domain": [ - "The Rat Protein Signal Peptide dataset contains rat [taxid:10116] signal peptide information for the UniProtKB canonical accessions. The dataset contains protein information, signal peptide sequence, signal peptide length, signal peptide start and end position, cleaved protein sequence, eco ids and pmids. The dataset contains related files in the output subdomain that includes log file containing accessions that do not have signal peptide information and processed protein sequence\u2019s files in fasta format. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 658 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000284/v-2.0.2", - "usability_domain": [ - "The dataset provides information on biosynthetic enzymes (UniProtKB Ac.) associated to the glycan structures (GlyTouCan Accessions). The enzymes listed are human or mouse proteins that are associated with the canonical monosaccharides. The association of a glycan structure to the canonical monosaccharides are based on the Glyco tree model. The glycans listed in the database may or may not have a human (TaxID:9606) or mouse (TaxID:10090) species annotation from the source (GlyTouCan or UniCarbKB) database. Please note that a human or mouse biosynthetic enzyme association should not be considered as a human or mouse species annotation for that glycan." - ], - "score": { - "usability_domain_length": 657 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000284/v-2.1.1", - "usability_domain": [ - "The dataset provides information on biosynthetic enzymes (UniProtKB Ac.) associated to the glycan structures (GlyTouCan Accessions). The enzymes listed are human or mouse proteins that are associated with the canonical monosaccharides. The association of a glycan structure to the canonical monosaccharides are based on the Glyco tree model. The glycans listed in the database may or may not have a human (TaxID:9606) or mouse (TaxID:10090) species annotation from the source (GlyTouCan or UniCarbKB) database. Please note that a human or mouse biosynthetic enzyme association should not be considered as a human or mouse species annotation for that glycan." - ], - "score": { - "usability_domain_length": 657 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_013112/0.1", - "usability_domain": [ - "This pipeline enables the complete identification and sequencing of all coding/non-coding RNA species in human genome in one sequencing run.", - "The data used to validate this pipeline are simulated. Text link to simulated dataset is included input field.", - "Final output Fastq files were checked for anti-sense reads using custom python scripts (not released)", - "the original pipeline can be accessed https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-14-S7-S10/figures/1", - "Gene annotation parameters available at http://bmcbioinformatics.biomedcentral.com/articles/101.1186/1471-2105-14-S7-S10", - "Protocol patent pending RM2010A000293-PCT/IB2011/052369" - ], - "score": { - "usability_domain_length": 654 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_013112/1.0", - "usability_domain": [ - "This pipeline enables the complete identification and sequencing of all coding/non-coding RNA species in human genome in one sequencing run.", - "The data used to validate this pipeline are simulated. Text link to simulated dataset is included input field.", - "Final output Fastq files were checked for anti-sense reads using custom python scripts (not released)", - "the original pipeline can be accessed https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-14-S7-S10/figures/1", - "Gene annotation parameters available at http://bmcbioinformatics.biomedcentral.com/articles/101.1186/1471-2105-14-S7-S10", - "Protocol patent pending RM2010A000293-PCT/IB2011/052369" - ], - "score": { - "usability_domain_length": 654 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000038/1.2", - "usability_domain": [ - "Quality control data extracted from NCBI's SRA for the current FDA ARGOS BioProject.", - "For this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProjects (PRJNA231221). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 652 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000225/v-2.0.2", - "usability_domain": [ - "The Human Diseases dataset contains list of human [taxid:9606] diseases as described by OMIM and Monarch Initiative database. ", - "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship. ", - "The Monarch Initiative is focused primarily on phenotype-related resources and brings in data associated with those phenotypes so that the users can begin to make connections among other biological entities of interest. ", - "If you are using this dataset please give proper attribution to OMIM, Monarch Initiative, EMBL-EBI-UniProt and GlyGen. " - ], - "score": { - "usability_domain_length": 651 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000225/v-2.1.1", - "usability_domain": [ - "The Human Diseases dataset contains list of human [taxid:9606] diseases as described by OMIM and Monarch Initiative database. ", - "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship. ", - "The Monarch Initiative is focused primarily on phenotype-related resources and brings in data associated with those phenotypes so that the users can begin to make connections among other biological entities of interest. ", - "If you are using this dataset please give proper attribution to OMIM, Monarch Initiative, EMBL-EBI-UniProt and GlyGen. " - ], - "score": { - "usability_domain_length": 651 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000109/1.0.36", - "usability_domain": [ - "The therascreen PIK3CA RGQ PCR Kit is a real-time qualitative PCR test for the detection of 11 mutations in the PIK3CA gene (Exon 7: C420R; Exon 9: E542K, E545A, E545D [1635G>T only], E545G, E545K, Q546E, Q546R; and Exon 20: H1047L, H1047R, H1047Y) using genomic DNA (gDNA) extracted from formalin-fixed, paraffin-embedded (FFPE) breast tumor tissue or circulating tumor DNA (ctDNA) from plasma derived from K2EDTA anticoagulated peripheral whole blood ... The test is intended to aid clinicians in identifying breast cancer patients who may be eligible for treatment with PIQRAY (alpelisib) based on a PIK3CA Mutation Detected result.FTCID:P190001" - ], - "score": { - "usability_domain_length": 648 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000009/1.6", - "usability_domain": [ - "Quality control metrics extracted from NCBI's SRA run data.", - "For this data set, we ran the NCBI eutils efetch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProject PRJNA231221. This returned 3657 Ids. Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 647 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000459/1.2.1", - "usability_domain": [ - "nfcore/chipseq is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data.", - "For use with multiple replicates, the group identifier should be identical when you have multiple replicates from the same experimental group, just increment the replicate identifier appropriately. The first replicate value for any given experimental group must be 1.", - "Both the group and replicate identifiers should be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. " - ], - "score": { - "usability_domain_length": 646 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000465/1.2.1", - "usability_domain": [ - "nfcore/chipseq is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data.", - "For use with multiple replicates, the group identifier should be identical when you have multiple replicates from the same experimental group, just increment the replicate identifier appropriately. The first replicate value for any given experimental group must be 1.", - "Both the group and replicate identifiers should be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. " - ], - "score": { - "usability_domain_length": 646 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000038/1.1", - "usability_domain": [ - "Quality control data extracted from NCBI's SRA for the FDA ARGOS BioProject", - "For this data set, we ran the NCBI eutils esearch (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all SRA Run IDs from the selected BioProjects (PRJNA231221). Once we had the list of SRA run ids, we constructed a CURL command to download each of the SRR XML files containing the summary data. The resulting file contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the quality of specific SRR run files, or a subset of the files referenced in this list." - ], - "score": { - "usability_domain_length": 643 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000701/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniprotKB Xref RefSeq contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " - ], - "score": { - "usability_domain_length": 635 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000701/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniprotKB Xref RefSeq contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein. " - ], - "score": { - "usability_domain_length": 635 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000092/1.0.36", - "usability_domain": [ - "A0396 is a panel of identified gene predictive biomarkers (mutations) in lung cancer (DOID:1324). The genes are ALK (UPKB:Q9UM73), CDK4 (UPKB:P11802), DDR2 (UPKB:Q16832), MAP2K1 (UPKB:Q02750), MAP2K2 (UPKB:Q02750), EGFR (UPKB:P00533), FGFR2 (UPKB:P21802), FGFR3 (UPKB:P22607), HRAS (UPKB:P01112), KRAS (UPKB:P01116), NRAS (UPKB:P01111), MET (UPKB:P08581), KIT (UPKB:P10721), PIK3CA (UPKB:P42336), PGFRA (UPKB:P16234), RET (UPKB:P07949), ROS1 (UPKB:P08922), ATK1 (UPKB:P31749), RAF1 (UPKB:P04049), ERBB2 (UPKB:P04626), ERBB3 (UPKB:P21860), BRAF (UPKB:P15056), MTOR (UPKB:P42345). This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 634 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000378/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Virus Pathogen Resource (ViPR) contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to ViPR database accessions/identifiers.", - "The Virus Pathogen Database and Analysis Resource (ViPR) is an integrative and comprehensive publicly available database and analysis resource to search, analyze, visualize, save and share data for viral pathogens in the U.S. National Institute of Allergy and Infectious Diseases (NIAID) Category A-C Priority Pathogen lists for biodefense research, and other viral pathogens causing emerging/reemerging infectious diseases." - ], - "score": { - "usability_domain_length": 634 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000379/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Virus Pathogen Resource contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to ViPR database accessions/identifiers.", - "The Virus Pathogen Database and Analysis Resource (ViPR) is an integrative and comprehensive publicly available database and analysis resource to search, analyze, visualize, save and share data for viral pathogens in the U.S. National Institute of Allergy and Infectious Diseases (NIAID) Category A-C Priority Pathogen lists for biodefense research, and other viral pathogens causing emerging/reemerging infectious diseases." - ], - "score": { - "usability_domain_length": 634 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000308/v-2.0.2", - "usability_domain": [ - "The dataset provides Genes involved in the Congenital disorders of glycosylation from Genomics England PanelApp ( PanelApp crowdsources expert knowledge to establish consensus diagnostic gene panels. Antonio Rueda Martin and Eleanor Williams, Rebecca E. Foulger, Sarah Leigh, Louise C. Daugherty, Olivia Niblock, Ivone U. S. Leong, Katherine R. Smith, Oleg Gerasimenko, Eik Haraldsdottir, Ellen Thomas, Richard H. Scott, Emma Baple, Arianna Tucci, Helen Brittain, Anna de Burca, Kristina Iba\u00f1ez, Dalia Kasperaviciute, Damian Smedley, Mark Caulfield, Augusto Rendon & Ellen M. McDonagh. Nat Genet (2019) doi:10.1038/s41588-019-0528-2)" - ], - "score": { - "usability_domain_length": 633 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000308/v-2.1.1", - "usability_domain": [ - "The dataset provides Genes involved in the Congenital disorders of glycosylation from Genomics England PanelApp ( PanelApp crowdsources expert knowledge to establish consensus diagnostic gene panels. Antonio Rueda Martin and Eleanor Williams, Rebecca E. Foulger, Sarah Leigh, Louise C. Daugherty, Olivia Niblock, Ivone U. S. Leong, Katherine R. Smith, Oleg Gerasimenko, Eik Haraldsdottir, Ellen Thomas, Richard H. Scott, Emma Baple, Arianna Tucci, Helen Brittain, Anna de Burca, Kristina Iba\u00f1ez, Dalia Kasperaviciute, Damian Smedley, Mark Caulfield, Augusto Rendon & Ellen M. McDonagh. Nat Genet (2019) doi:10.1038/s41588-019-0528-2)" - ], - "score": { - "usability_domain_length": 633 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000092/v-2.0.2", - "usability_domain": [ - "The Human UniprotKB Xref BioMuta contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to BioMuta database accessions/identifiers. BioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates. The current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 632 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000092/v-2.1.1", - "usability_domain": [ - "The Human UniprotKB Xref BioMuta contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to BioMuta database accessions/identifiers. BioMuta is a single-nucleotide variation (SNV) and disease association database where variations are mapped to genomes and RefSeq nucleotide entries, and unified through UniProtKB/Swiss-Prot positional coordinates. The current version of BioMuta contains only nonsynonymous single-nucleotide variations (nsSNVs) associated with cancer. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 632 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_018406/1.0", - "usability_domain": [ - "Next generation sequencing data is used to characterize the genetic background of genetically engineered mice through the use of variant identification. This pipeline follows the anaylsis of publically available RNA sequencing data. The process will begin with alignement and complete at the point where KO-linked variants are identified. Farkas, C., Fuentes-Villalobos, F., Rebolledo-Jaramillo, B. et al. Streamlined computational pipeline for genetic background characterization of genetically engineered mice based on next generation sequencing data. BMC Genomics 20, 131 (2019). https://doi.org/10.1186/s12864-019-5504-9" - ], - "score": { - "usability_domain_length": 625 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_067092/1.0", - "usability_domain": [ - "Pipeline for identifying copy number of genetic sequences independent of the genes in which they occur, and with higher fidelity than existing methods. Approximately 25 individuals were randomly chosen from each of the CEU, YRI, CHB, JPT, MXL, CLM, PUR, ASW, LWK, CHS, TSI, IBS, FIN, and BGR populations for a total of 324 individuals. Where domains were more than 1 kb apart, the boundaries of the domains were extended up to 250 bp to allow the possibility of capturing unique sequence directly adjacent to the domain. This example pipeline was created based on the work of Astling et al. doi: 10.1186/s12864-017-3976-z" - ], - "score": { - "usability_domain_length": 621 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000049/1.0", - "usability_domain": [ - "Pipeline for identifying copy number of genetic sequences independent of the genes in which they occur, and with higher fidelity than existing methods. Approximately 25 individuals were randomly chosen from each of the CEU, YRI, CHB, JPT, MXL, CLM, PUR, ASW, LWK, CHS, TSI, IBS, FIN, and BGR populations for a total of 324 individuals. Where domains were more than 1 kb apart, the boundaries of the domains were extended up to 250 bp to allow the possibility of capturing unique sequence directly adjacent to the domain. This example pipeline was created based on the work of Astling et al. doi: 10.1186/s12864-017-3976-z" - ], - "score": { - "usability_domain_length": 621 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000369/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref IntAct dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." - ], - "score": { - "usability_domain_length": 619 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000369/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref IntAct dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." - ], - "score": { - "usability_domain_length": 619 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000355/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Citations dataset contains publication information for hcv1a [taxid:11108] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/hcv1a_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 616 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000355/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Citations dataset contains publication information for hcv1a [taxid:11108] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/hcv1a_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 616 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000011/1.0", - "usability_domain": [ - "Positional QC process.", - "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000011/1.1", - "usability_domain": [ - "Positional QC process.", - "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000011/1.2", - "usability_domain": [ - "Positional QC process.", - "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000011/1.3", - "usability_domain": [ - "Positional QC process.", - "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000368/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref IntAct dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000630/v-2.0.2", - "usability_domain": [ - "The Fruitfly Glycosylation Sites (GlyConnect) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported glycans in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Drosophila melanogaster). If you use this dataset please provide proper attribution to GlyConnect and GlyGen. Please note that this dataset does not contain protein or sites information just the glycans" - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000011/1.4", - "usability_domain": [ - "Positional QC process.", - "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000368/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref IntAct dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000630/v-2.1.1", - "usability_domain": [ - "The Fruitfly Glycosylation Sites (GlyConnect) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported glycans in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Drosophila melanogaster). If you use this dataset please provide proper attribution to GlyConnect and GlyGen. Please note that this dataset does not contain protein or sites information just the glycans" - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000011/1.5", - "usability_domain": [ - "Positional QC process.", - "This workflow describes the process of evaluating granular, positional level quality evaluatoin by using an annotation file that describes genomic features. Data are ingested into HIVE in the form of SRAs associated with a particular assembly, extracted to fastq format with the SRA Toolkit, aligned to assembly genome, and evaluated for SNVs. Alignment is carried out using HIVE Hexagon, and variant calling is carried out using Heptagon. After variant calling, data are evaluated in the context of a GenBank annotation file. The primary use case for this data set is explore site QC data." - ], - "score": { - "usability_domain_length": 612 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000038/1.0", - "usability_domain": [ - "This algorithm was originally developed at the Center for Biologics Evaluation and Research to assist in safety prediction of aluminum containing infant vaccines. A full description of the algorithm is published in the journal \u201cvaccine\u201d (Mitkus et al. 2011). The algorithm is capable of creating aluminum pharmacokinetic profiles for infant following recommended vaccination schedule by the Advisory Committee on Immunization Practices of the Centers for Disease Control and Prevention. Also it predicts aluminum safety threshold based on the level set by the Agency for Toxic Substances and Disease Registry." - ], - "score": { - "usability_domain_length": 610 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000010/1.0", - "usability_domain": [ - "BioSample metadata extracted from SRA using the HIVE Lab workflow.", - "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the metrics associated with specific biosamples, or a subset of the files referenced in this set." - ], - "score": { - "usability_domain_length": 610 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000010/1.1", - "usability_domain": [ - "BioSample metadata extracted from SRA using the HIVE Lab workflow.", - "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the metrics associated with specific biosamples, or a subset of the files referenced in this set." - ], - "score": { - "usability_domain_length": 610 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000101/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref GeneCards contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to GeneCards database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " GeneCards is a searchable, integrative database that provides comprehensive, user-friendly information on all annotated and predicted human genes. It automatically integrates gene-centric data from ~150 web sources, including genomic, transcriptomic, proteomic, genetic, clinical and functional information." - ], - "score": { - "usability_domain_length": 610 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000091/1.0.36", - "usability_domain": [ - "A0395 Gene expression levels (measured by the Ct value determined when the fluorescent signal exceeds a pre-defined threshold limit. If the external controls are valid, then the Ct value for each gene marker in the patient sample is compared to marker-specific Ct cutoff values. Samples with Ct values less than or equal to one or both of the cutoff values for MG or CKi9 are considered positive. The Cutoff Ct values are as follows: MG < 31, CK19 < 30, Internal Control < 36.) for breast cancer metastasis. The genes are MG (NA) and KRT19 (UPKB:P08727). This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 610 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000101/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref GeneCards contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to GeneCards database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " GeneCards is a searchable, integrative database that provides comprehensive, user-friendly information on all annotated and predicted human genes. It automatically integrates gene-centric data from ~150 web sources, including genomic, transcriptomic, proteomic, genetic, clinical and functional information." - ], - "score": { - "usability_domain_length": 610 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_022530/2.2", - "usability_domain": [ - "Identify treatment emergent amino acid substitutions[so:0000048] that correlate with antiviral drug treatment failure", - "This biocompute object is the BCO proof of concept for FDA data submission process. BCO is intended to facilitate data analysis and communication during FDA data submission. We have generated mock HCV patient data to mimic a real clinical trial FDA submission and confirm if BCO facilitates the submission process. BCO elucidates potential dicordant results between prirmary data analyis and FDA data re-analysis", - "Clinical trial description can be found at: URI: https://clinicaltrials.gov/" - ], - "score": { - "usability_domain_length": 605 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000498/v-2.0.2", - "usability_domain": [ - "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for human [taxid:9606] proteins (https://data.glygen.org/GLYDS000040: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197, https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB human glycoprotein dataset (https://data.glygen.org/GLYDS000040: v-1.4.5 09/13/2019). " - ], - "score": { - "usability_domain_length": 605 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000498/v-2.1.1", - "usability_domain": [ - "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for human [taxid:9606] proteins (https://data.glygen.org/GLYDS000040: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197, https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB human glycoprotein dataset (https://data.glygen.org/GLYDS000040: v-1.4.5 09/13/2019). " - ], - "score": { - "usability_domain_length": 605 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000536/v-2.0.2", - "usability_domain": [ - "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for mouse [taxid:10090] proteins (https://data.glygen.org/GLYDS000041: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB mouse glycoprotein dataset (https://data.glygen.org/GLYDS000041: v-1.4.5 09/13/2019)" - ], - "score": { - "usability_domain_length": 604 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000536/v-2.1.1", - "usability_domain": [ - "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for mouse [taxid:10090] proteins (https://data.glygen.org/GLYDS000041: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB mouse glycoprotein dataset (https://data.glygen.org/GLYDS000041: v-1.4.5 09/13/2019)" - ], - "score": { - "usability_domain_length": 604 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000448/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref IntAct dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." - ], - "score": { - "usability_domain_length": 603 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000448/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref IntAct dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal." - ], - "score": { - "usability_domain_length": 603 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000276/v-2.0.2", - "usability_domain": [ - "The Rat [taxid:10116] UniprotKB Xref RefSeq contains rat UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." - ], - "score": { - "usability_domain_length": 601 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000276/v-2.1.1", - "usability_domain": [ - "The Rat [taxid:10116] UniprotKB Xref RefSeq contains rat UniProtKB canonical accessions cross-referenced to RefSeq database accessions/identifiers. The script extracts one-to-one UniProtKB isoform ac to RefSeq ac and also UniProtKB ac to RefSeq ac where there is no UniProtKB isoforms other than the canonical ac. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI RefSeq is a comprehensive, integrated, non-redundant, well-annotated set of reference sequences including genomic, transcript, and protein." - ], - "score": { - "usability_domain_length": 601 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000537/v-2.0.2", - "usability_domain": [ - "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for rat [taxid:10116] proteins (https://data.glygen.org/GLYDS000221: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB rat glycoprotein dataset (https://data.glygen.org/GLYDS000221: v-1.4.5 09/13/2019)" - ], - "score": { - "usability_domain_length": 600 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000537/v-2.1.1", - "usability_domain": [ - "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information retrieved from the UniCarbKB database for rat [taxid:10116] proteins (https://data.glygen.org/GLYDS000221: v-1.4.5 09/13/2019)Source- PRO: https://proconsortium.org/pro.shtml, UniCarbKB: https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128. Please note that this dataset refers to the previous version of the UniCarbKB rat glycoprotein dataset (https://data.glygen.org/GLYDS000221: v-1.4.5 09/13/2019)" - ], - "score": { - "usability_domain_length": 600 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000541/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref NCBI Gene dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." - ], - "score": { - "usability_domain_length": 596 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000541/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref NCBI Gene dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." - ], - "score": { - "usability_domain_length": 596 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000010/1.2", - "usability_domain": [ - "BioSample metadata extracted from SRA.", - "NCBI's SRA BiosampleDB has a great deal of metadata collected upon submission. For this data set NCBI eutils esearch was run (https://www.ncbi.nlm.nih.gov/books/NBK25499/) to retrieve all biosample metadata XML files from a BioProject. The metadata was parsed resulting in this table which contains identifiers linking each record to multiple other NCBI databases. The primary use case for this data set is to provide a method to quickly evaluate the metrics associated with specific biosamples, or a subset of the files referenced in this set." - ], - "score": { - "usability_domain_length": 582 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000447/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref NCBI Gene dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." - ], - "score": { - "usability_domain_length": 582 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000447/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref NCBI Gene dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." - ], - "score": { - "usability_domain_length": 582 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000036/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Citations dataset contains publication information for mouse [taxid:10090] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/mouse_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 581 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000103/1.0.36", - "usability_domain": [ - "A0407 is a TOP2A FISH pharmDx Kit is designed to detect amplifications and deletions (copy number changes) of the TOP2A gene using fluorescence in situ hybridization (FISH) technique on formalin- fixed, paraffin-embedded human breast cancer tissue specimens. Deletions and amplifications of the TOP2A gene serve as a marker for poor prognosis in high-risk breast cancer patients. Results from the TOP2A FISH pharmDx MT Kit are intended for use as an adjunct to existing clinical and pathological information. [FTCID:P050045]. This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 581 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000036/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Citations dataset contains publication information for mouse [taxid:10090] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/mouse_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 581 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000035/v-2.0.2", - "usability_domain": [ - "The Human Protein Citations dataset contains publication information for human [taxid:9606] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/human_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 578 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000233/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref IntAct dataset contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " - ], - "score": { - "usability_domain_length": 578 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000035/v-2.1.1", - "usability_domain": [ - "The Human Protein Citations dataset contains publication information for human [taxid:9606] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/human_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 578 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000233/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref IntAct dataset contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " - ], - "score": { - "usability_domain_length": 578 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000236/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref IntAct dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " - ], - "score": { - "usability_domain_length": 577 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000253/v-2.0.2", - "usability_domain": [ - "The Rat Protein Citations dataset contains publication information for rat [taxid:10116] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/rat_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 577 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000236/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref IntAct dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " - ], - "score": { - "usability_domain_length": 577 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000253/v-2.1.1", - "usability_domain": [ - "The Rat Protein Citations dataset contains publication information for rat [taxid:10116] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/rat_protein_blacklisted_pmids_uniprotkb.csv. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 577 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000263/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref IntAct dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " - ], - "score": { - "usability_domain_length": 574 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000263/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref IntAct dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " - ], - "score": { - "usability_domain_length": 574 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000530/v-2.0.2", - "usability_domain": [ - "The SARS-CoV2 Glycosylation Sites (GlyConnect) dataset contains SARS-CoV2 [taxid:2697049] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Severe acute respiratory syndrome coronavirus 2 (2019-nCoV)). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." - ], - "score": { - "usability_domain_length": 572 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000530/v-2.1.1", - "usability_domain": [ - "The SARS-CoV2 Glycosylation Sites (GlyConnect) dataset contains SARS-CoV2 [taxid:2697049] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Severe acute respiratory syndrome coronavirus 2 (2019-nCoV)). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." - ], - "score": { - "usability_domain_length": 572 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000028/1.1", - "usability_domain": [ - "BioSample metadata is manually extracted from NCBI's BioSample database.", - "This dataset is one of the four 'core' tables produced by the Crandall Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The primary use case for this data set is to track sample metadata associated with raw sequencing files selected for further QC, and a secondary use case is to provide a method to quickly evaluate the metrics associated with specific biosamples." - ], - "score": { - "usability_domain_length": 570 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000028/1.0", - "usability_domain": [ - "BioSample metadata is manually extracted from NCBI's BioSample database.", - "This dataset is one of the four 'core' tables produced by the Crandall Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The primary use case for this data set is to track sample metadata associated with raw sequencing files selected for further QC, and a secondary use case is to provide a method to quickly evaluate the metrics associated with specific biosamples" - ], - "score": { - "usability_domain_length": 569 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000020/1.0", - "usability_domain": [ - "BioSample metadata is manually extracted from NCBI's BioSample database.", - "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The primary use case for this data set is to track sample metadata associated with raw sequencing files selected for further QC, and a secondary use case is to provide a method to quickly evaluate the metrics associated with specific biosamples" - ], - "score": { - "usability_domain_length": 565 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000020/1.1", - "usability_domain": [ - "BioSample metadata is manually extracted from NCBI's BioSample database.", - "This dataset is one of the four 'core' tables produced by the HIVE Lab for the ARGOS Project, and is intended to provide sample related metadata for the NGS files selected and analyzed in the three other core tables (ngsQC, siteQC, and assemblyQC). The primary use case for this data set is to track sample metadata associated with raw sequencing files selected for further QC, and a secondary use case is to provide a method to quickly evaluate the metrics associated with specific biosamples" - ], - "score": { - "usability_domain_length": 565 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000583/v-2.0.2", - "usability_domain": [ - "List of mouse [taxid:10090] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 564 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000625/v-2.0.2", - "usability_domain": [ - "This is a Human Protein Cancer Biomarkers dataset. It contains human [taxonomy:9606] cancer biomarker information including biomarker main x-ref mapped to UniProtKB, assessed biomarker entity, BEST biomarker type, literature evidence, disease name. and LOINC code from the cancer biomarker portal (https://data.oncomx.org/cancerbiomarkers). If you use this dataset, please provide proper attribution to OncoMX and GlyGen. Cite:PMID:32142370, PMID:34015823, and PMID:31616925. This dataset can be used for obtaining cancer biomarker information for 19 cancer types." - ], - "score": { - "usability_domain_length": 564 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000583/v-2.1.1", - "usability_domain": [ - "List of mouse [taxid:10090] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 564 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000582/v-2.0.2", - "usability_domain": [ - "List of human [taxid:9606] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 563 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000582/v-2.1.1", - "usability_domain": [ - "List of human [taxid:9606] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 563 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000584/v-2.0.2", - "usability_domain": [ - "List of rat [taxid:10116] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 562 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000386/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniprotKB Xref BRENDA contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 562 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000386/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniprotKB Xref BRENDA contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 562 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000584/v-2.1.1", - "usability_domain": [ - "List of rat [taxid:10116] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). This file contains rows that are detected from automatic literature mining and which failed the automatic QC where the annotation (protein+site) didn't already exist in GlyGen. such rows are then manually verified by curators. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 562 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000371/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref InterPro contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 556 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000396/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for mouse [taxid:10090] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 556 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000371/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref InterPro contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 556 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000396/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for mouse [taxid:10090] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 556 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000395/v-2.0.2", - "usability_domain": [ - "The Human Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for human [taxid:9606] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 555 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000395/v-2.1.1", - "usability_domain": [ - "The Human Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for human [taxid:9606] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 555 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_046954/1.0", - "usability_domain": [ - "Pipeline for identifying expression counts from three maize genome assemblies and corresponding annotations, which are B73, PH207 and W22 .Iin each dataset, the tissue samples were chosen to broadly capture variation. Two biological replicates per genotype/tissue combination and standard, non-stranded RNAseq libraries were prepared and sequenced on Illumina HiSeq 2500, using 50\u2009bp SE reads, avg. number of reads\u2009=\u200930.5 million. file:///dev/tmpfs was used for the file IOs since some steps were ran on the command line. doi: 10.1186/s12864-020-6696-8." - ], - "score": { - "usability_domain_length": 554 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000450/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref InterPro contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 554 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000450/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref InterPro contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 554 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000397/v-2.0.2", - "usability_domain": [ - "The Rat Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for rat [taxid:10116] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot.. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 553 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000104/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref NCBI Gene dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." - ], - "score": { - "usability_domain_length": 553 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000104/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref NCBI Gene dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." - ], - "score": { - "usability_domain_length": 553 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000397/v-2.1.1", - "usability_domain": [ - "The Rat Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for rat [taxid:10116] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot.. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 553 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000105/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref NCBI Gene dataset contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide. " - ], - "score": { - "usability_domain_length": 552 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000105/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref NCBI Gene dataset contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide. " - ], - "score": { - "usability_domain_length": 552 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000272/2.9", - "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]", - "Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure", - "Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus" - ], - "score": { - "usability_domain_length": 551 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000370/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref InterPro contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 549 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000370/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref InterPro contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 549 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000032/1.0", - "usability_domain": [ - "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference protein coding sequences. ", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." - ], - "score": { - "usability_domain_length": 548 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000144/1.0", - "usability_domain": [ - "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference protein coding sequences. ", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." - ], - "score": { - "usability_domain_length": 548 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000032/1.2", - "usability_domain": [ - "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference protein coding sequences. ", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." - ], - "score": { - "usability_domain_length": 548 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000238/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref NCBI Gene dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-05 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide. " - ], - "score": { - "usability_domain_length": 548 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000238/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref NCBI Gene dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. The dataset is derived from 2019-05 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide. " - ], - "score": { - "usability_domain_length": 548 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000449/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref InterPro contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 540 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000449/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref InterPro contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 540 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000102/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref HGNC dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to HGNC accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The HUGO Gene Nomenclature Committee is a committee of the Human Genome Organization that sets the standards for human gene nomenclature. The HGNC approves a unique and meaningful name for every known human gene, based on a query of experts. " - ], - "score": { - "usability_domain_length": 535 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000102/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref HGNC dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to HGNC accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The HUGO Gene Nomenclature Committee is a committee of the Human Genome Organization that sets the standards for human gene nomenclature. The HGNC approves a unique and meaningful name for every known human gene, based on a query of experts. " - ], - "score": { - "usability_domain_length": 535 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000637/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Citations dataset contains publication information for fruitfly [taxid:7227] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/fruitfly_protein_blacklisted_pmids_uniprotkb.csv. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 534 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000637/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Citations dataset contains publication information for fruitfly [taxid:7227] UniProtKB accessions that includes PMID, journal name, date and author and title. Publications of large scale studies, and high frequency publications pertaining to genome sequencing, cDNA, chromosomes have been filtered out as per https://data.glygen.org/ln2data/releases/data/v-1.12.3/compiled/fruitfly_protein_blacklisted_pmids_uniprotkb.csv. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 534 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000663/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref IntAct dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " - ], - "score": { - "usability_domain_length": 529 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000663/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref IntAct dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to IntAct database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "IntAct provides a freely available, open source database system and analysis tools for molecular interaction data. All interactions are derived from literature curation or direct user submissions and are freely available. The IntAct Team also produce the Complex Portal. " - ], - "score": { - "usability_domain_length": 529 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000094/v-2.0.2", - "usability_domain": [ - "The Mouse UniprotKB Xref BRENDA contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 526 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000094/v-2.1.1", - "usability_domain": [ - "The Mouse UniprotKB Xref BRENDA contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 526 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000093/v-2.0.2", - "usability_domain": [ - "The Human UniprotKB Xref BRENDA contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 524 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000093/v-2.1.1", - "usability_domain": [ - "The Human UniprotKB Xref BRENDA contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 524 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000016/1.0.25", - "usability_domain": [ - "Comprehensive cancer driver mutations table. The file human_cancer_driver_muts.csv contains a list of gene symbols and ensembl transcript ids mapped to cancer driver mutations predicted by three structural analysis level computational tools. The csv file human_cancer_driver_muts contains ensembl transcript ids, gene symbols, and computationally determined cancer driver mutations.The csv file human_cancer_driver_muts contains ensembl transcript ids, gene symbols, and computationally determined cancer driver mutations." - ], - "score": { - "usability_domain_length": 522 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000056/1.0.36", - "usability_domain": [ - "Comprehensive cancer driver mutations table. The file human_cancer_driver_muts.csv contains a list of gene symbols and ensembl transcript ids mapped to cancer driver mutations predicted by three structural analysis level computational tools. The csv file human_cancer_driver_muts contains ensembl transcript ids, gene symbols, and computationally determined cancer driver mutations.The csv file human_cancer_driver_muts contains ensembl transcript ids, gene symbols, and computationally determined cancer driver mutations." - ], - "score": { - "usability_domain_length": 522 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000270/v-2.0.2", - "usability_domain": [ - "The Rat UniprotKB Xref BRENDA contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 521 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000019/1.2", - "usability_domain": [ - "NGS QC metrics generated in the HIVE platform via the ngsQC protocol. ", - "This dataset was generated to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. The data was analyzed by Mazumder and Crandall Labs. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results. The secondary use case is assisting in the selection of quality fastq files for genome assembly. " - ], - "score": { - "usability_domain_length": 521 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000019/1.3", - "usability_domain": [ - "NGS QC metrics generated in the HIVE platform via the ngsQC protocol. ", - "This dataset was generated to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. The data was analyzed by Mazumder and Crandall Labs. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results. The secondary use case is assisting in the selection of quality fastq files for genome assembly. " - ], - "score": { - "usability_domain_length": 521 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000270/v-2.1.1", - "usability_domain": [ - "The Rat UniprotKB Xref BRENDA contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 521 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000019/1.4", - "usability_domain": [ - "NGS QC metrics generated in the HIVE platform via the ngsQC protocol. ", - "This dataset was generated to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. The data was analyzed by Mazumder and Crandall Labs. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results. The secondary use case is assisting in the selection of quality fastq files for genome assembly. " - ], - "score": { - "usability_domain_length": 521 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000090/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref Bgee dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." - ], - "score": { - "usability_domain_length": 519 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000090/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref Bgee dataset contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." - ], - "score": { - "usability_domain_length": 519 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000004/1.3", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure, map to gene sequences, and more.." - ], - "score": { - "usability_domain_length": 518 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_022531/2.2", - "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]. Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure. Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus." - ], - "score": { - "usability_domain_length": 516 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000331/v-2.0.2", - "usability_domain": [ - "The Rat Glycosylation Sites (GlyConnect) dataset contains rat [taxid:10116] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Rattus Norvegicus). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." - ], - "score": { - "usability_domain_length": 516 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000266/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref Bgee dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", - " Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." - ], - "score": { - "usability_domain_length": 516 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000266/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref Bgee dataset contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", - " Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." - ], - "score": { - "usability_domain_length": 516 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000331/v-2.1.1", - "usability_domain": [ - "The Rat Glycosylation Sites (GlyConnect) dataset contains rat [taxid:10116] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Rattus Norvegicus). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." - ], - "score": { - "usability_domain_length": 516 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000004/1.0", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." - ], - "score": { - "usability_domain_length": 515 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000004/1.1", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." - ], - "score": { - "usability_domain_length": 515 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000004/1.2", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." - ], - "score": { - "usability_domain_length": 515 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000118/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref PANTHER contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products. " - ], - "score": { - "usability_domain_length": 515 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000107/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref InterPro contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterize them. " - ], - "score": { - "usability_domain_length": 515 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000330/v-2.0.2", - "usability_domain": [ - "The Mouse Glycosylation Sites (GlyConnect) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Mus musculus). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." - ], - "score": { - "usability_domain_length": 515 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000107/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref InterPro contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterize them. " - ], - "score": { - "usability_domain_length": 515 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000118/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref PANTHER contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products. " - ], - "score": { - "usability_domain_length": 515 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000330/v-2.1.1", - "usability_domain": [ - "The Mouse Glycosylation Sites (GlyConnect) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Mus musculus). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." - ], - "score": { - "usability_domain_length": 515 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000329/v-2.0.2", - "usability_domain": [ - "The Human Glycosylation Sites (GlyConnect) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Homo sapiens). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." - ], - "score": { - "usability_domain_length": 514 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000329/v-2.1.1", - "usability_domain": [ - "The Human Glycosylation Sites (GlyConnect) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported glycosylation sites in GlyConnect database (https://glyconnect.expasy.org/; PMID: 30574787).A json file was derived through GlyConnect API (https://glyconnect.expasy.org/api/docs/index.html?url=/api/assets/swagger.json#/Glycosylation/listGlycosylationsAsJson) (Query: taxonomy:Homo sapiens). If you use this dataset please provide proper attribution to GlyConnect and GlyGen." - ], - "score": { - "usability_domain_length": 514 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000232/v-2.0.2", - "usability_domain": [ - "The Rat UniprotKB Xref CAZy contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "CAZy is a database of Carbohydrate-Active enZymes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates. " - ], - "score": { - "usability_domain_length": 513 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000095/v-2.0.2", - "usability_domain": [ - "The Human UniprotKB Xref CAZy contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "CAZy is a database of Carbohydrate-Active enZYmes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates." - ], - "score": { - "usability_domain_length": 513 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000096/v-2.0.2", - "usability_domain": [ - "The Mouse UniprotKB Xref CAZy contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "CAZy is a database of Carbohydrate-Active enZymes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates" - ], - "score": { - "usability_domain_length": 513 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000095/v-2.1.1", - "usability_domain": [ - "The Human UniprotKB Xref CAZy contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "CAZy is a database of Carbohydrate-Active enZYmes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates." - ], - "score": { - "usability_domain_length": 513 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000096/v-2.1.1", - "usability_domain": [ - "The Mouse UniprotKB Xref CAZy contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "CAZy is a database of Carbohydrate-Active enZymes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates" - ], - "score": { - "usability_domain_length": 513 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000232/v-2.1.1", - "usability_domain": [ - "The Rat UniprotKB Xref CAZy contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "CAZy is a database of Carbohydrate-Active enZymes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates. " - ], - "score": { - "usability_domain_length": 513 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000100/1.0", - "usability_domain": [ - "Lake Victoria marburgvirus (strain musoke-80) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448; Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 512 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000243/v-2.0.2", - "usability_domain": [ - "The Rat UniprotKB Xref InterPro contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterize them. " - ], - "score": { - "usability_domain_length": 512 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000261/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref PANTHER contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products. " - ], - "score": { - "usability_domain_length": 512 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000243/v-2.1.1", - "usability_domain": [ - "The Rat UniprotKB Xref InterPro contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterize them. " - ], - "score": { - "usability_domain_length": 512 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000261/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref PANTHER contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products. " - ], - "score": { - "usability_domain_length": 512 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000117/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref PANTHER contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products." - ], - "score": { - "usability_domain_length": 511 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000106/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref InterPro contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 511 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000139/v-2.0.2", - "usability_domain": [ - "The Mouse Proteoform Citations (UniCarbKB) contains mouse [taxid:10090] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. . The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " - ], - "score": { - "usability_domain_length": 511 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000106/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref InterPro contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 511 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000117/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref PANTHER contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products." - ], - "score": { - "usability_domain_length": 511 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000139/v-2.1.1", - "usability_domain": [ - "The Mouse Proteoform Citations (UniCarbKB) contains mouse [taxid:10090] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. . The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " - ], - "score": { - "usability_domain_length": 511 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000091/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref Bgee contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", - "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets" - ], - "score": { - "usability_domain_length": 510 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000091/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref Bgee contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", - "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets" - ], - "score": { - "usability_domain_length": 510 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000138/v-2.0.2", - "usability_domain": [ - "The Human Proteoform Citations (UniCarbKB) contains human [taxid:9606] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " - ], - "score": { - "usability_domain_length": 508 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000138/v-2.1.1", - "usability_domain": [ - "The Human Proteoform Citations (UniCarbKB) contains human [taxid:9606] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " - ], - "score": { - "usability_domain_length": 508 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000707/v-2.1.1", - "usability_domain": [ - "This dataset indicates the tool support status of each GlyTouCan accession. The status is required for the GlyCan Detail API. The tools are sandbox, gnome, pdb, gnome_glygen, gnome_glygen_nglycans, gnome_glygen_oglycans, gnome_glycotree_nglycans, and gnome_glycotree_oglycans. PDB support for glycam 3D structures was provided by Rob Woods (Glycam/UGA). Sandbox support was provided by Will York (UGA) and Nathan Edwards (Georgetown). All other tool support data was provided by Nathan Edwards (Georgetown)." - ], - "score": { - "usability_domain_length": 507 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000655/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for fruitfly [taxid:7227] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 506 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000655/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Submitted Names (UniProtKB) dataset contains UniProtKB submitted names for fruitfly [taxid:7227] UniProtKB/TrEMBL proteins accessions. In UniProt, UniProtKB/TrEMBL entries usually have 'Submitted name' which may later be improved by automatic annotation procedures but if not, it remains as provided by the submitter until the entry is manually annotated and integrated to UniProtKB/Swiss-Prot. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 506 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000251/v-2.0.2", - "usability_domain": [ - "The Rat Proteoform Citations (UniCarbKB) contains rat [taxid:10116] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " - ], - "score": { - "usability_domain_length": 505 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000662/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref NCBI Gene dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." - ], - "score": { - "usability_domain_length": 505 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000251/v-2.1.1", - "usability_domain": [ - "The Rat Proteoform Citations (UniCarbKB) contains rat [taxid:10116] publication information of glycoproteins from UniCarbKB. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID from UniCarbKB. The dataset is contributed by UniCarbKB and processed by GlyGen. If you use this dataset please provide proper attribution to UniCarbKB and GlyGen [UniCarbKB - https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. " - ], - "score": { - "usability_domain_length": 505 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000662/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref NCBI Gene dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to NCBI Gene database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "NCBI Gene integrates information from a wide range of species. A record may include nomenclature, Reference Sequences (RefSeqs), maps, pathways, variations, phenotypes, and links to genome-, phenotype-, and locus-specific resources worldwide." - ], - "score": { - "usability_domain_length": 505 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000001/1.1", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "score": { - "usability_domain_length": 498 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000001/1.2", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "score": { - "usability_domain_length": 498 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000001/1.3", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "score": { - "usability_domain_length": 498 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000001/1.4", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein coding sequences.", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences. " - ], - "score": { - "usability_domain_length": 498 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000557/v-2.0.2", - "usability_domain": [ - "Glycan dictionary is a list of terms which describe some structural aspect of the glycan composition or structure. The terms are extracted with a combination of manual and automatic literature mining tools from various sources. It is an on-going process and the current list includes 180 terms..The terms can also be viewed on the GlyGen wikipedia:https://wiki.glygen.org/index.php/Glycan_structure_dictionary. To submit more terms please follow instructions provided on the GlyGen wikipedia page. " - ], - "score": { - "usability_domain_length": 498 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000557/v-2.1.1", - "usability_domain": [ - "Glycan dictionary is a list of terms which describe some structural aspect of the glycan composition or structure. The terms are extracted with a combination of manual and automatic literature mining tools from various sources. It is an on-going process and the current list includes 180 terms..The terms can also be viewed on the GlyGen wikipedia:https://wiki.glygen.org/index.php/Glycan_structure_dictionary. To submit more terms please follow instructions provided on the GlyGen wikipedia page. " - ], - "score": { - "usability_domain_length": 498 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000426/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for sarscov2 [taxid:2697049] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 495 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000081/1.0.36", - "usability_domain": [ - "A list of identified, differentially expressed human (taxid:9606) predictor protein biomarkers in liver cirrhosis (DOID:5082) and hepatocellular carcinoma (DOID:684). Logged datatypes for biomarkers include supporting literature evidence (source ID and scientific statements); specimen and disease types; the biomarker entity, its measurement modality (e.g., increased expression) and type; and cross reference (programmatic linkage or resource ID) to data resources, models, and codes. [PMID:x]" - ], - "score": { - "usability_domain_length": 495 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000426/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for sarscov2 [taxid:2697049] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 495 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000029/1.0", - "usability_domain": [ - "Lake Victoria marburgvirus (strain Musoke-80) reference protein coding sequences", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000180448; strain Kenya/Musoke/1980). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." - ], - "score": { - "usability_domain_length": 494 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000029/1.1", - "usability_domain": [ - "Lake Victoria marburgvirus (strain Musoke-80) reference protein coding sequences", - "Cross reference to genes was retrieved using mappings present in proteins that were retrieved using UniProt proteome ID (UniProt ID: UP000180448; strain Kenya/Musoke/1980). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to visualize how protein annotations related to drug resistance mutations, selection pressure and more map to gene sequences." - ], - "score": { - "usability_domain_length": 494 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000488/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Diseases dataset contains disease caused by hcv1b virus [taxid:11116] which is Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma and is mapped to all UniProtKB canonical accessions. The dataset contains Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to GlyGen." - ], - "score": { - "usability_domain_length": 494 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000488/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Diseases dataset contains disease caused by hcv1b virus [taxid:11116] which is Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma and is mapped to all UniProtKB canonical accessions. The dataset contains Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to GlyGen." - ], - "score": { - "usability_domain_length": 494 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000381/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Hepatitis C Online database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 492 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000381/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Hepatitis C Online database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 492 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000629/v-2.0.2", - "usability_domain": [ - "Citation details (such as title, journal_name, publication_date, authors) corresponding to the Digital Object Identifiers (DOI)/PMID present in the UniCarbKB SARS-CoV1 Glycosylation Sites dataset (https://data.glygen.org/GLY_000628). If you use this dataset, please provide proper attribution to UniCarbKB, EMBL-EBI-UniProtKB, and GlyGen. Cite: PMID:26940363, PMID:33237286, and PMID:31616925. This dataset can be used for obtaining publication information for UniCarbKB glycosylation data." - ], - "score": { - "usability_domain_length": 490 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000629/v-2.1.1", - "usability_domain": [ - "Citation details (such as title, journal_name, publication_date, authors) corresponding to the Digital Object Identifiers (DOI)/PMID present in the UniCarbKB SARS-CoV1 Glycosylation Sites dataset (https://data.glygen.org/GLY_000628). If you use this dataset, please provide proper attribution to UniCarbKB, EMBL-EBI-UniProtKB, and GlyGen. Cite: PMID:26940363, PMID:33237286, and PMID:31616925. This dataset can be used for obtaining publication information for UniCarbKB glycosylation data." - ], - "score": { - "usability_domain_length": 490 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000055/1.0", - "usability_domain": [ - "This workflow was created to curate and record protein and gene annotation data for use in the ARGOS DRM pipeline. ", - "This workflow was created to curate and record protein and gene annotation data for use in the ARGOS DRM pipeline. Multiple databases are searched to gather information relating to amino acid changes that confer drug resistance within a genome. The information is gathered at the amino acid level and each entry is cross-referenced and verified through publication searches." - ], - "score": { - "usability_domain_length": 489 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000055/1.1", - "usability_domain": [ - "This workflow was created to curate and record protein and gene annotation data for use in the ARGOS DRM pipeline. ", - "This workflow was created to curate and record protein and gene annotation data for use in the ARGOS DRM pipeline. Multiple databases are searched to gather information relating to amino acid changes that confer drug resistance within a genome. The information is gathered at the amino acid level and each entry is cross-referenced and verified through publication searches." - ], - "score": { - "usability_domain_length": 489 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000487/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Diseases dataset contains disease caused by hcv1a virus [taxid:11108] which is Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma and is mapped to all UniProtKB canonical accessions. The dataset contains Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to GlyGen." - ], - "score": { - "usability_domain_length": 487 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000487/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Diseases dataset contains disease caused by hcv1a virus [taxid:11108] which is Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma and is mapped to all UniProtKB canonical accessions. The dataset contains Hepatitis C, Liver Cirrhosis and Hepatocellular carcinoma disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to GlyGen." - ], - "score": { - "usability_domain_length": 487 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000380/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Hepatitis C Online database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 485 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000380/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Hepatitis C Online database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 485 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000425/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for sarscov1 [taxid:694009] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 481 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000425/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for sarscov1 [taxid:694009] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 481 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000373/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref PDB contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." - ], - "score": { - "usability_domain_length": 479 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000373/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref PDB contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." - ], - "score": { - "usability_domain_length": 479 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000101/1.0", - "usability_domain": [ - "Lake Victoria marburgvirus reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448); Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 478 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000031/0.1", - "usability_domain": [ - "Lake Victoria marburgvirus reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448); Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 478 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000031/1.1", - "usability_domain": [ - "Lake Victoria marburgvirus reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448); Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 478 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000031/1.2", - "usability_domain": [ - "Lake Victoria marburgvirus reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448); Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 478 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000375/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 478 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000279/v-2.0.2", - "usability_domain": [ - "The Rat Glycosyltransferases dataset contains list of rat [taxid:9606] glycosyltransferases enzymes with evidence of glycosyltransferase activity defined by one or several of the following criteria: gene ontology (GO) or UniProtKB/Swiss-Prot annotation; classification by CAZY database; inclusion in the GT panel developed by the Consortium of Functional Glycomics (CFG); domains reported by InterPro and Pfam. If you use this dataset please provide proper attribution to GlyGen" - ], - "score": { - "usability_domain_length": 478 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000294/0.1", - "usability_domain": [ - "This pipeline represents a workflow that identifies SNPs, deletions, and insertions that correspond to reduced antiviral drug efficacy in Hepatitis C virus subtype 1/a. This is first done through an alignment of the viral reads to a reference genome. When the small read sequences are mapped, the identification of SNPs results in a variant map. This resulting SNP map can be contrasted between individuals with or without drug resistance to associate probable causal mutations." - ], - "score": { - "usability_domain_length": 478 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000279/v-2.1.1", - "usability_domain": [ - "The Rat Glycosyltransferases dataset contains list of rat [taxid:9606] glycosyltransferases enzymes with evidence of glycosyltransferase activity defined by one or several of the following criteria: gene ontology (GO) or UniProtKB/Swiss-Prot annotation; classification by CAZY database; inclusion in the GT panel developed by the Consortium of Functional Glycomics (CFG); domains reported by InterPro and Pfam. If you use this dataset please provide proper attribution to GlyGen" - ], - "score": { - "usability_domain_length": 478 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000375/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 478 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000110/v-2.0.2", - "usability_domain": [ - "The Human UniprotKB Xref OMIM contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OMIM database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship." - ], - "score": { - "usability_domain_length": 477 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000452/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref PDB contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." - ], - "score": { - "usability_domain_length": 477 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000077/1.0.36", - "usability_domain": [ - " A0131 is a biomarker panel of 20 biomarker entities that enable prognosis of COVID-19 (DOID:0080600) disease course by measuring the change in distribution of 20 urinary peptides. The biomarker panel named COVID20, is composed of 20 endogenous peptides mainly derived from various collagen chains that enable differentiating moderate or severe disease from critical state or death with 83% sensitivity at 100% specificity. Biomarker data retrieved from PubMed [PMID: 32960510]" - ], - "score": { - "usability_domain_length": 477 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000110/v-2.1.1", - "usability_domain": [ - "The Human UniprotKB Xref OMIM contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OMIM database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Online Mendelian Inheritance in Man (OMIM) is a continuously updated catalog of human genes and genetic disorders and traits, with a particular focus on the gene-phenotype relationship." - ], - "score": { - "usability_domain_length": 477 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000452/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref PDB contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." - ], - "score": { - "usability_domain_length": 477 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000658/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniprotKB Xref BRENDA contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 476 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000454/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref Pfam contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 476 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000454/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref Pfam contains sarscov2 [taxid:2697049] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 476 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000658/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniprotKB Xref BRENDA contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to BRENDA database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "BRENDA is an information system representing one of the most comprehensive enzyme repositories. It is an electronic resource that comprises molecular and biochemical information on enzymes that have been classified by the IUBMB." - ], - "score": { - "usability_domain_length": 476 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000031/1.0", - "usability_domain": [ - "Lake Victoria marburgvirus reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000180448; Lake Victoria marburgvirus (strain Musoke-80) (MARV) (Marburg virus (strain Kenya/Musoke/1980). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 475 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000116/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref OrthoDB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." - ], - "score": { - "usability_domain_length": 473 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000116/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref OrthoDB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." - ], - "score": { - "usability_domain_length": 473 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000372/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref PDB contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." - ], - "score": { - "usability_domain_length": 472 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000344/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Proteome Materlist dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020_06 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. Please note GlyGen has removed F protein (P0C045 (F_HCV77)) from its masterlist of accessions." - ], - "score": { - "usability_domain_length": 472 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000115/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref OrthoDB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." - ], - "score": { - "usability_domain_length": 472 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000115/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref OrthoDB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." - ], - "score": { - "usability_domain_length": 472 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000344/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Proteome Materlist dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020_06 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. Please note GlyGen has removed F protein (P0C045 (F_HCV77)) from its masterlist of accessions." - ], - "score": { - "usability_domain_length": 472 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000372/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref PDB contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." - ], - "score": { - "usability_domain_length": 472 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000257/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref OrthoDB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology. " - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000374/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000657/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref Bgee dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000480/v-2.0.2", - "usability_domain": [ - "Human Glycosylation Sites [GPTwiki], provided by the Clinical and Translational Glycoscience Research Center (CTGRC), Georgetown University. The database contains list of human [taxid:9606] proteins with information on glycosylation sites and associated glycans from GPTwiki database [https://edwardslab.bmcb.georgetown.edu/gptwiki/Main_Page]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000103/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref MGI contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to MGI database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "MGI is the international database resource for the laboratory mouse, providing integrated genetic, genomic, and biological data to facilitate the study of mouse health and disease." - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000103/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref MGI contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to MGI database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "MGI is the international database resource for the laboratory mouse, providing integrated genetic, genomic, and biological data to facilitate the study of mouse health and disease." - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000257/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref OrthoDB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology. " - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000374/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000480/v-2.1.1", - "usability_domain": [ - "Human Glycosylation Sites [GPTwiki], provided by the Clinical and Translational Glycoscience Research Center (CTGRC), Georgetown University. The database contains list of human [taxid:9606] proteins with information on glycosylation sites and associated glycans from GPTwiki database [https://edwardslab.bmcb.georgetown.edu/gptwiki/Main_Page]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000657/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref Bgee dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Bgee database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Bgee is a database to retrieve and compare gene expression patterns in multiple animal species, produced from multiple data types (RNA-Seq, Affymetrix, in situ hybridization, and EST data) and from multiple data sets." - ], - "score": { - "usability_domain_length": 471 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_022118/1.0", - "usability_domain": [ - "BCO Object for interrogating Telomerase RNA Component (TERC) - chromatin interaction. Three libraries were assembled: HiChIRP TERC sample from human female B lymphoblastoid cell line (GM12878) as well as RNase (GM12878) and HeLa cell line TERC-knockout samples as negative controls. Libraries were generated by performing HiChIRP (https://doi.org/10.1038/s41592-019-0407-x) and Illumina paired-end sequencing on the samples. Pipeline steps are repeated for each sample." - ], - "score": { - "usability_domain_length": 469 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000098/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref CDD contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." - ], - "score": { - "usability_domain_length": 469 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000097/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref CDD contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." - ], - "score": { - "usability_domain_length": 469 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000097/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref CDD contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." - ], - "score": { - "usability_domain_length": 469 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000098/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref CDD contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." - ], - "score": { - "usability_domain_length": 469 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000234/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref CDD contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins. " - ], - "score": { - "usability_domain_length": 468 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000234/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref CDD contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins. " - ], - "score": { - "usability_domain_length": 468 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_015568/1.0", - "usability_domain": [ - "Pipeline for identifying malarial cell types in images using region-based convolutional neural networks. Deep learning object detection presents another method of identifying cells in different stages of P. vivax development based on training from 1364 annotated images. The outputs of the pipeline are the model weights and validation results. This pipeline was created based on the work of Hung, J., Goodman, A., Ravel, D. et al. doi: 10.1186/s12859-020-03635-x" - ], - "score": { - "usability_domain_length": 466 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000659/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniprotKB Xref CAZy contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "CAZy is a database of Carbohydrate-Active enZYmes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates." - ], - "score": { - "usability_domain_length": 465 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000414/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Binary Interaction (IntAct) dataset contains sarscov1 [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 465 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000414/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Binary Interaction (IntAct) dataset contains sarscov1 [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 465 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000659/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniprotKB Xref CAZy contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to CAZy database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "CAZy is a database of Carbohydrate-Active enZYmes (CAZymes). The database contains a classification and associated information about enzymes involved in the synthesis, metabolism, and recognition of complex carbohydrates." - ], - "score": { - "usability_domain_length": 465 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000664/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref InterPro contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 463 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000451/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref PDB contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." - ], - "score": { - "usability_domain_length": 463 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000668/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref PANTHER contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products." - ], - "score": { - "usability_domain_length": 463 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000451/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref PDB contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids." - ], - "score": { - "usability_domain_length": 463 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000664/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref InterPro contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to InterPro database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "InterPro is a database of protein families, domains and functional sites in which identifiable features found in known proteins can be applied to new protein sequences in order to functionally characterise them." - ], - "score": { - "usability_domain_length": 463 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000668/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref PANTHER contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PANTHER database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The PANTHER classification system is a large curated biological database of gene/protein families and their functionally related subfamilies that can be used to classify and identify the function of gene products." - ], - "score": { - "usability_domain_length": 463 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000005/1.0", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 462 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000005/1.1", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 462 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000005/1.2", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 462 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000005/1.3", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 462 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000453/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref Pfam contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 462 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000453/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniProtKB Xref Pfam contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models." - ], - "score": { - "usability_domain_length": 462 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000286/v-2.0.2", - "usability_domain": [ - "The dataset provides the monosaccharide compositions for the associated glycans (GlyTouCan Accession). It provides the count for sugar residues such as Hexose (Hex), N-Acetylhexosamine (HexNAc), Deoxyhexose (dHex), N-Acetylneuraminic Acid ( NeuAc), N-Glycolylneuraminic Acid (NeuGc), Other (Xxx). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 460 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000621/v-2.0.2", - "usability_domain": [ - "The dataset provides the monosaccharide compositions for the associated glycans (GlyTouCan Accession). It provides the count for sugar residues such as Hexose (Hex), N-Acetylhexosamine (HexNAc), Deoxyhexose (dHex), N-Acetylneuraminic Acid ( NeuAc), N-Glycolylneuraminic Acid (NeuGc), Other (Xxx). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 460 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000286/v-2.1.1", - "usability_domain": [ - "The dataset provides the monosaccharide compositions for the associated glycans (GlyTouCan Accession). It provides the count for sugar residues such as Hexose (Hex), N-Acetylhexosamine (HexNAc), Deoxyhexose (dHex), N-Acetylneuraminic Acid ( NeuAc), N-Glycolylneuraminic Acid (NeuGc), Other (Xxx). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 460 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000621/v-2.1.1", - "usability_domain": [ - "The dataset provides the monosaccharide compositions for the associated glycans (GlyTouCan Accession). It provides the count for sugar residues such as Hexose (Hex), N-Acetylhexosamine (HexNAc), Deoxyhexose (dHex), N-Acetylneuraminic Acid ( NeuAc), N-Glycolylneuraminic Acid (NeuGc), Other (Xxx). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 460 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000129/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref UniCarbKB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. " - ], - "score": { - "usability_domain_length": 459 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000377/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to euHCVdb database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The euHCVdb is mainly oriented towards protein sequence, structure and function analyses and structural biology of HCV." - ], - "score": { - "usability_domain_length": 459 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000129/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref UniCarbKB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. " - ], - "score": { - "usability_domain_length": 459 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000377/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) UniProtKB Xref Pfam contains hcv1b [taxid:11116] UniProtKB canonical accessions cross-referenced to euHCVdb database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The euHCVdb is mainly oriented towards protein sequence, structure and function analyses and structural biology of HCV." - ], - "score": { - "usability_domain_length": 459 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000130/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref UniCarbKB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 458 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000130/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref UniCarbKB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 458 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000289/v-2.1.1", - "usability_domain": [ - "The dataset provides the sequences in InChI format and InChI key for the associated glycans (GlyTouCan Accession) based on the GlyTouCan to PubChem CID mapping dataset (https://data.glygen.org/GLY_000305). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). InChI sequences and keys are retrieved from the PubChem ftp site (ftp://ftp.ncbi.nlm.nih.gov/pubchem/Compound/CURRENT-Full/SDF/)." - ], - "score": { - "usability_domain_length": 458 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000109/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref KEGG PATHWAY contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." - ], - "score": { - "usability_domain_length": 457 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000109/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref KEGG PATHWAY contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." - ], - "score": { - "usability_domain_length": 457 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000259/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref UniCarbKB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. " - ], - "score": { - "usability_domain_length": 456 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000108/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref KEGG PATHWAY contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." - ], - "score": { - "usability_domain_length": 456 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000250/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref KEGG PATHWAY contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks. " - ], - "score": { - "usability_domain_length": 456 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000108/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref KEGG PATHWAY contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." - ], - "score": { - "usability_domain_length": 456 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000250/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref KEGG PATHWAY contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks. " - ], - "score": { - "usability_domain_length": 456 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000259/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref UniCarbKB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to UniCarbKB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "UniCarbKB is an initiative that aims to promote the creation of an online information storage and search platform for glycomics and glycobiology research. " - ], - "score": { - "usability_domain_length": 456 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000022/1.2", - "usability_domain": [ - "QC of short read sequences re-assembled into genomes.", - "Illumina metatranscriptomic reads containing a target virus of interest are assumed as input. Reads are first ran through quality control using fastp. They are then mapped to a host (human) genome and any reads that map sufficiently are removed. The resulting reads are then ran through the de novo assembler SPAdes. Finally, the assembled contigs are ran through QUAST for associated assembly metrics." - ], - "score": { - "usability_domain_length": 455 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000269/v-2.0.2", - "usability_domain": [ - "The Mouse N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for mouse [taxid:10090] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKb accessions. If you use this dataset please provide proper attribution to GlyGen. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 455 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000269/v-2.1.1", - "usability_domain": [ - "The Mouse N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for mouse [taxid:10090] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKb accessions. If you use this dataset please provide proper attribution to GlyGen. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 455 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000401/v-2.0.2", - "usability_domain": [ - "The Human Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for human [taxid:9606] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 452 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000376/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to euHCVdb database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The euHCVdb is mainly oriented towards protein sequence, structure and function analyses and structural biology of HCV." - ], - "score": { - "usability_domain_length": 452 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000391/v-2.0.2", - "usability_domain": [ - "The Mouse Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for mouse [taxid:10090] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 452 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000376/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) UniProtKB Xref Pfam contains hcv1a [taxid:11108] UniProtKB canonical accessions cross-referenced to euHCVdb database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The euHCVdb is mainly oriented towards protein sequence, structure and function analyses and structural biology of HCV." - ], - "score": { - "usability_domain_length": 452 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000391/v-2.1.1", - "usability_domain": [ - "The Mouse Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for mouse [taxid:10090] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 452 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000401/v-2.1.1", - "usability_domain": [ - "The Human Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for human [taxid:9606] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 452 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000272/v-2.0.2", - "usability_domain": [ - "The Rat N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for rat [taxid:10116] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKb accessions. If you use this dataset please provide proper attribution to GlyGen. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 451 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000112/v-2.0.2", - "usability_domain": [ - "The Human UniprotKB Xref neXtProt contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to neXtProt database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "neXtProt is a comprehensive human-centric discovery platform, offering its users a seamless integration of and navigation through protein-related data.", - "" - ], - "score": { - "usability_domain_length": 451 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000112/v-2.1.1", - "usability_domain": [ - "The Human UniprotKB Xref neXtProt contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to neXtProt database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "neXtProt is a comprehensive human-centric discovery platform, offering its users a seamless integration of and navigation through protein-related data.", - "" - ], - "score": { - "usability_domain_length": 451 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000272/v-2.1.1", - "usability_domain": [ - "The Rat N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for rat [taxid:10116] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKb accessions. If you use this dataset please provide proper attribution to GlyGen. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 451 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000390/v-2.0.2", - "usability_domain": [ - "The Rat Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for rat [taxid:10116] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 448 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000390/v-2.1.1", - "usability_domain": [ - "The Rat Gene Symbols (UniProtKB) dataset contains UniProtKB gene symbols/names for rat [taxid:10116] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 448 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000002/1.1", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 446 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000002/1.2", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 446 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000002/1.3", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 446 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000002/1.4", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference proteome fasta sequences.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 446 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000006/1.0", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 445 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000006/1.1", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 445 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000006/1.2", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000464024; Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 445 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000675/v-2.0.2", - "usability_domain": [ - "The Fruitfly Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file httpss://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (httpss://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 445 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000675/v-2.1.1", - "usability_domain": [ - "The Fruitfly Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file httpss://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (httpss://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 445 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000535/v-2.0.2", - "usability_domain": [ - "The SARS-CoV2 Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000530. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 444 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000535/v-2.1.1", - "usability_domain": [ - "The SARS-CoV2 Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000530. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 444 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000056/1.0", - "usability_domain": [ - "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.\n\nThis dataset can be used to get list of canonical and isoform proteins with status and the gene names." - ], - "score": { - "usability_domain_length": 443 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000057/1.0", - "usability_domain": [ - "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.\n\nThis dataset can be used to get list of canonical and isoform proteins with status and the gene names." - ], - "score": { - "usability_domain_length": 443 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_013139/1.0", - "usability_domain": [ - "N-glycosylation has been studied in the development and progression of BC. N-glycan abundances were measured using UPLC from interstitial samples and matched serum. TIF, NIF, and serum samples were collected from about 90 women diagnosed with breast cancer and a total of 165 N-glycan groups were identified. TIF-serum N-glycan abundance correlation was analyzed using CAMPP. GP1, GP37, and GP38 were found to have significant correlation." - ], - "score": { - "usability_domain_length": 440 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000483/v-2.0.2", - "usability_domain": [ - "The Human Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 440 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000484/v-2.0.2", - "usability_domain": [ - "The Mouse Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 440 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000483/v-2.1.1", - "usability_domain": [ - "The Human Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 440 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000484/v-2.1.1", - "usability_domain": [ - "The Mouse Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 440 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000034/1.0", - "usability_domain": [ - "Salmonella typhimurium reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000032/1.1", - "usability_domain": [ - "Salmonella typhimurium reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000034/1.1", - "usability_domain": [ - "Salmonella typhimurium reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000001014; Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000485/v-2.0.2", - "usability_domain": [ - "The Rat Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000408/v-2.0.2", - "usability_domain": [ - "Based on the RCSB PDB struture 6VSB (https://www.rcsb.org/structure/6vsb), a 3-D model of SARS-CoV-2 (TaxID:2697049) spike glycoprotein is generated by Prof. Dr. Robert Woods's group at the Complex Carbohydrate Research Center, University of Georgia. The 3D model can be downloaded as an animated GIF through this GlyGen data object. For more information visit GlyGen wiki (https://wiki.glygen.org/index.php/SARS-CoV-2_spike_glycoprotein)" - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000124/v-2.0.2", - "usability_domain": [ - "The Mouse UniprotKB Xref PRO contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000313/v-2.0.2", - "usability_domain": [ - "The Human Protein Binary Interaction (IntAct) dataset contains human [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000120/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref PDB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000314/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Binary Interaction (IntAct) dataset contains mouse [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000120/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref PDB contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000124/v-2.1.1", - "usability_domain": [ - "The Mouse UniprotKB Xref PRO contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - " PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000313/v-2.1.1", - "usability_domain": [ - "The Human Protein Binary Interaction (IntAct) dataset contains human [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000314/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Binary Interaction (IntAct) dataset contains mouse [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000485/v-2.1.1", - "usability_domain": [ - "The Rat Proteoform Citations (GlyConnect) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000329. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID. The dataset is contributed by GlyGen and processed by GlyGen. If you use this dataset please provide proper attribution to GlyConnect (https://glyconnect.expasy.org/)." - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000408/v-2.1.1", - "usability_domain": [ - "Based on the RCSB PDB struture 6VSB (https://www.rcsb.org/structure/6vsb), a 3-D model of SARS-CoV-2 (TaxID:2697049) spike glycoprotein is generated by Prof. Dr. Robert Woods's group at the Complex Carbohydrate Research Center, University of Georgia. The 3D model can be downloaded as an animated GIF through this GlyGen data object. For more information visit GlyGen wiki (https://wiki.glygen.org/index.php/SARS-CoV-2_spike_glycoprotein)" - ], - "score": { - "usability_domain_length": 438 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000122/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref Pfam contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " - ], - "score": { - "usability_domain_length": 437 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000119/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref PDB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " - ], - "score": { - "usability_domain_length": 437 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000119/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref PDB contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " - ], - "score": { - "usability_domain_length": 437 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000122/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref Pfam contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " - ], - "score": { - "usability_domain_length": 437 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000013/1.1", - "usability_domain": [ - "HIV1 (HXB2) reference protein accessions and summary annotations. ", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000002241; Human immunodeficiency virus type 1 group M subtype B (isolate HXB2)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 436 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000013/1.2", - "usability_domain": [ - "HIV1 (HXB2) reference protein accessions and summary annotations. ", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000002241; Human immunodeficiency virus type 1 group M subtype B (isolate HXB2)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 436 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000013/1.3", - "usability_domain": [ - "HIV1 (HXB2) reference protein accessions and summary annotations. ", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000002241; Human immunodeficiency virus type 1 group M subtype B (isolate HXB2)). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 436 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000121/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref Pfam contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " - ], - "score": { - "usability_domain_length": 436 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000136/v-2.0.2", - "usability_domain": [ - "The Mouse Transcript Locus (Ensembl Transcript coordinates) contains UniProtKB mouse [taxid:10090] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end positions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 436 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000121/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref Pfam contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " - ], - "score": { - "usability_domain_length": 436 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000136/v-2.1.1", - "usability_domain": [ - "The Mouse Transcript Locus (Ensembl Transcript coordinates) contains UniProtKB mouse [taxid:10090] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end positions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 436 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000409/v-2.0.2", - "usability_domain": [ - "Based on the RCSB PDB struture 6VSB (https://www.rcsb.org/structure/6vsb), a 3-D model of SARS-CoV-2 (TaxID:2697049) spike glycoprotein is generated by Prof. Dr. Robert Woods's group at the Complex Carbohydrate Research Center, University of Georgia. The 3D model can be downloaded as a .mp4 video through this GlyGen data object. For more information visit GlyGen wiki (https://wiki.glygen.org/index.php/SARS-CoV-2_spike_glycoprotein)" - ], - "score": { - "usability_domain_length": 435 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000135/v-2.0.2", - "usability_domain": [ - "The Human Transcript Locus (Ensembl Transcript coordinates)s contains UniProtKB human [taxid:9606] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end position. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 435 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000123/v-2.0.2", - "usability_domain": [ - "The Human UniprotKB Xref PRO contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " - ], - "score": { - "usability_domain_length": 435 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000245/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref PDB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " - ], - "score": { - "usability_domain_length": 435 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000123/v-2.1.1", - "usability_domain": [ - "The Human UniprotKB Xref PRO contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " - ], - "score": { - "usability_domain_length": 435 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000135/v-2.1.1", - "usability_domain": [ - "The Human Transcript Locus (Ensembl Transcript coordinates)s contains UniProtKB human [taxid:9606] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end position. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 435 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000245/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref PDB contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " - ], - "score": { - "usability_domain_length": 435 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000409/v-2.1.1", - "usability_domain": [ - "Based on the RCSB PDB struture 6VSB (https://www.rcsb.org/structure/6vsb), a 3-D model of SARS-CoV-2 (TaxID:2697049) spike glycoprotein is generated by Prof. Dr. Robert Woods's group at the Complex Carbohydrate Research Center, University of Georgia. The 3D model can be downloaded as a .mp4 video through this GlyGen data object. For more information visit GlyGen wiki (https://wiki.glygen.org/index.php/SARS-CoV-2_spike_glycoprotein)" - ], - "score": { - "usability_domain_length": 435 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000446/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniprotKB Xref ChEMBL contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." - ], - "score": { - "usability_domain_length": 434 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000315/v-2.0.2", - "usability_domain": [ - "The Rat Protein Binary Interaction (IntAct) dataset contains rat [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 434 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000315/v-2.1.1", - "usability_domain": [ - "The Rat Protein Binary Interaction (IntAct) dataset contains rat [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 434 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000446/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) UniprotKB Xref ChEMBL contains sarscov1 [taxid:694009] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." - ], - "score": { - "usability_domain_length": 434 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000262/v-2.0.2", - "usability_domain": [ - "The Rat UniprotKB Xref PRO contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " - ], - "score": { - "usability_domain_length": 432 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000262/v-2.1.1", - "usability_domain": [ - "The Rat UniprotKB Xref PRO contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " - ], - "score": { - "usability_domain_length": 432 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000248/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref Pfam contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 431 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000273/v-2.0.2", - "usability_domain": [ - "The Rat [taxid:10116] UniprotKB Xref RefSeq contains rat UniProtKB canonical accessions cross-referenced to Rat Genome Database (RGD) accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. f you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", - "The Rat Genome Database (RGD) is the premier site for genetic, genomic, phenotype, and disease data generated from rat research" - ], - "score": { - "usability_domain_length": 431 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000237/v-2.0.2", - "usability_domain": [ - "The Rat Ensembl Transcript Locus (Transcript coordinates)s contains UniProtKB rat [taxid:10116] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end positions. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 431 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000049/1.0.36", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for non-small cell lung cancer - This file contains FDA-approved human biomarker tests for non-small cell lung cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." - ], - "score": { - "usability_domain_length": 431 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000237/v-2.1.1", - "usability_domain": [ - "The Rat Ensembl Transcript Locus (Transcript coordinates)s contains UniProtKB rat [taxid:10116] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end positions. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 431 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000248/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref Pfam contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 431 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000273/v-2.1.1", - "usability_domain": [ - "The Rat [taxid:10116] UniprotKB Xref RefSeq contains rat UniProtKB canonical accessions cross-referenced to Rat Genome Database (RGD) accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. f you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen.", - "The Rat Genome Database (RGD) is the premier site for genetic, genomic, phenotype, and disease data generated from rat research" - ], - "score": { - "usability_domain_length": 431 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000403/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for hcv1b [taxid:11116] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 430 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000027/1.0.25", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for non-small cell lung cancer - This file contains FDA-approved human biomarker tests for non-small cell lung cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)" - ], - "score": { - "usability_domain_length": 430 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000403/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for hcv1b [taxid:11116] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 430 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000082/v-2.0.2", - "usability_domain": [ - "The Mouse Gene Locus dataset contains mouse [taxid:10090] UniProtKB canonical accessions mapped to the corresponding ensembl gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 429 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000082/v-2.1.1", - "usability_domain": [ - "The Mouse Gene Locus dataset contains mouse [taxid:10090] UniProtKB canonical accessions mapped to the corresponding ensembl gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 429 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000428/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for sarscov2 [taxid:2697049] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2020_01 UniprotKB release." - ], - "score": { - "usability_domain_length": 428 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000105/1.0.36", - "usability_domain": [ - "A0409 is the Invader UGT1A1 Molecular Assay is an in vitro diagnostic test for the detection and genotyping of the *1 (TA6) and *28 (TA7) alleles of the UDP glucuronosyltransferase 1A1 (UGT1A1) gene in genomic DNA from whole peripheral blood as an aid in the identification of patients with greater risk for decreased UDP-glucuronosyltransferase activity. [FTCID:K051824]. This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 428 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000428/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for sarscov2 [taxid:2697049] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2020_01 UniprotKB release." - ], - "score": { - "usability_domain_length": 428 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000003/1.0", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 427 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000003/1.1", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 427 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000003/1.2", - "usability_domain": [ - "Influenza A (A/Puerto Rico/8/1934 H1N1) reference protein accessions and summary annotations.", - "Data was retrieved using UniProt proteome ID (UniProt ID: UP000009255; strain A/Puerto Rico/8/1934 H1N1). The primary use case for this data set is to retrieve protein sequences and annotations from UniProt and other sources and also can be used to visualize annotations such as drug resistance mutations, selection pressure, and more" - ], - "score": { - "usability_domain_length": 427 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000267/v-2.0.2", - "usability_domain": [ - "The Rat Gene Locus (Ensembl) contains rat [taxid:10116] UniProtKB canonical accessions mapped to the corresponding ENSEMBL Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 427 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000267/v-2.1.1", - "usability_domain": [ - "The Rat Gene Locus (Ensembl) contains rat [taxid:10116] UniProtKB canonical accessions mapped to the corresponding ENSEMBL Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 427 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000613/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2) Protein Names (NCBI RefSeq) dataset contains sarscov2 [taxid:2697049] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 425 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000613/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2) Protein Names (NCBI RefSeq) dataset contains sarscov2 [taxid:2697049] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 425 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000667/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref OrthoDB contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." - ], - "score": { - "usability_domain_length": 424 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000667/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref OrthoDB contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to OrthoDB database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " OrthoDB is a comprehensive catalog of orthologs, i.e. descendants from a single gene of the last common ancestor of a specific phylogeny radiation termed level-of-orthology." - ], - "score": { - "usability_domain_length": 424 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000402/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for hcv1a [taxid:11108] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 423 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000402/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for hcv1a [taxid:11108] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 423 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000489/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Diseases dataset contains disease caused by sarscov1 virus [taxid:694009] which is SARS and is mapped to all UniProtKB canonical accessions. The dataset contains SARS (Severe Acute Respiratory Syndrome) disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 422 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000489/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Diseases dataset contains disease caused by sarscov1 virus [taxid:694009] which is SARS and is mapped to all UniProtKB canonical accessions. The dataset contains SARS (Severe Acute Respiratory Syndrome) disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 422 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000660/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref CDD contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." - ], - "score": { - "usability_domain_length": 421 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000660/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref CDD contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to CDD database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The Conserved Domain Database is a database of well-annotated multiple sequence alignment models and derived database search models, for ancient domains and full-length proteins." - ], - "score": { - "usability_domain_length": 421 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000737/v-2.1.1", - "usability_domain": [ - "The Glycan Biomarkers dataset contains glycan biomarker information, including cross-mapped GlyTouCan accessions, assessed biomarker entity, biomarker type, specimen type, LOINC code, disease name, and source evidence from the biomarker portal (https://data.oncomx.org/allbiomarkers). If you use this dataset, please provide proper attribution to OncoMX and GlyGen. Cite: PMID:32142370, PMID:34015823, and PMID:31616925." - ], - "score": { - "usability_domain_length": 421 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_016981/1.0", - "usability_domain": [ - "Template workflow for the detection of adventitious virus, workflow was built and tested with a titration of synthetic reads and applied to a real dataset. The synthetic reads were generated from a set of 25 viruses sequences that represent the 25 families in the viral zone database and 6 host organisms as background sequences. The pipeline can be used to detect adventitious viral agents in biological manufacturing." - ], - "score": { - "usability_domain_length": 419 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000057/1.0.36", - "usability_domain": [ - "List of human [taxid:9606] nonsynonymous single-nucleotide variations (nsSNVs) with data in cancer samples from TCGA, ICGC, COSMIC, ClinVar, and CIViC - This file contains human [taxid:9606] nonsynonymous single-nucleotide variations (nsSNV) in cancer samples. Variants are mapped to canonical UniProtKB/Swiss-Prot AC, and integrated through unification of Disease Ontology (DO) terms an Uberon Anatomical Entity Terms." - ], - "score": { - "usability_domain_length": 419 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000007/v-2.0.2", - "usability_domain": [ - "The Mouse Proteome Masterlist dataset contains mouse [taxid:10090] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 414 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000427/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for sarscov1 [taxid:694009] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2020_01 UniprotKB release." - ], - "score": { - "usability_domain_length": 414 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000007/v-2.1.1", - "usability_domain": [ - "The Mouse Proteome Masterlist dataset contains mouse [taxid:10090] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 414 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000427/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for sarscov1 [taxid:694009] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2020_01 UniprotKB release." - ], - "score": { - "usability_domain_length": 414 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000043/v-2.0.2", - "usability_domain": [ - "The Mouse Glycosylation Sites (RCSB PDB) dataset contains mouse [taxid:10090] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." - ], - "score": { - "usability_domain_length": 413 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000026/1.0.25", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for colorectal cancer - This file contains FDA-approved human biomarker tests for colorectal cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." - ], - "score": { - "usability_domain_length": 413 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000047/1.0.36", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for colorectal cancer - This file contains FDA-approved human biomarker tests for colorectal cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." - ], - "score": { - "usability_domain_length": 413 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000043/v-2.1.1", - "usability_domain": [ - "The Mouse Glycosylation Sites (RCSB PDB) dataset contains mouse [taxid:10090] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." - ], - "score": { - "usability_domain_length": 413 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_022807/1.0", - "usability_domain": [ - "A subtyping model for SARS-CoV-2 based on Informative Subtype Markers (ISMs) defined as variable regions between viral genomes that serve as characteristic regions in the genome. This pipeline creates a multiple sequence alignment and together with the metadata allows for the viral profiling of different subtypes across different geographic locations visualized as pie charts/time series plots and ISM tables. " - ], - "score": { - "usability_domain_length": 412 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000042/v-2.0.2", - "usability_domain": [ - "The Human Glycosylation Sites (RCSB PDB) dataset contains human [taxid:9606] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." - ], - "score": { - "usability_domain_length": 412 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000001/23.0", - "usability_domain": [ - "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", - "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." - ], - "score": { - "usability_domain_length": 412 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000001/23.1", - "usability_domain": [ - "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", - "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." - ], - "score": { - "usability_domain_length": 412 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000001/23.2", - "usability_domain": [ - "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", - "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." - ], - "score": { - "usability_domain_length": 412 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000001/23.3", - "usability_domain": [ - "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", - "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." - ], - "score": { - "usability_domain_length": 412 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000042/v-2.1.1", - "usability_domain": [ - "The Human Glycosylation Sites (RCSB PDB) dataset contains human [taxid:9606] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." - ], - "score": { - "usability_domain_length": 412 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000001/23.4", - "usability_domain": [ - "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", - "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." - ], - "score": { - "usability_domain_length": 412 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000001/23.5", - "usability_domain": [ - "SARS-CoV-2 Wuhan genome assembly using MEGAHIT", - "Reads are downloaded with fasterq-dump, controlled for quality with fastp, and then mapped to the human genome with BWA-MEM. Any reads flagged as mapped are subsequently filtered out, thereby removing reads associated with the host. Remaining reads are then converted back into FASTQ format and assembled using Megahit. The longest contig is automatically extracted." - ], - "score": { - "usability_domain_length": 412 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000013/27.0", - "usability_domain": [ - "Multiple reference guided assembly with BWA, VAPOR and iVar.", - "FASTQs corresponding to an input SRA accession are downloaded. A FASTA of candidate references are expected as input. Reads are ran through fastp for quality control and ran through VAPOR to choose a suitable reference. QC'ed reads are then mapped to the input reference using BWA-mem. The consensus of the resulting mapped reads is called using ivar." - ], - "score": { - "usability_domain_length": 411 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000013/27.1", - "usability_domain": [ - "Multiple reference guided assembly with BWA, VAPOR and iVar.", - "FASTQs corresponding to an input SRA accession are downloaded. A FASTA of candidate references are expected as input. Reads are ran through fastp for quality control and ran through VAPOR to choose a suitable reference. QC'ed reads are then mapped to the input reference using BWA-mem. The consensus of the resulting mapped reads is called using ivar." - ], - "score": { - "usability_domain_length": 411 - } - }, - { - "object_id": "https://biocomputeobject.org/ARG_000013/27.2", - "usability_domain": [ - "Multiple reference guided assembly with BWA, VAPOR and iVar.", - "FASTQs corresponding to an input SRA accession are downloaded. A FASTA of candidate references are expected as input. Reads are ran through fastp for quality control and ran through VAPOR to choose a suitable reference. QC'ed reads are then mapped to the input reference using BWA-mem. The consensus of the resulting mapped reads is called using ivar." - ], - "score": { - "usability_domain_length": 411 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000367/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) (HCV) Protein Site Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 409 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000226/v-2.0.2", - "usability_domain": [ - "The Rat Glycosylation Sites (RCSB PDB) dataset contains rat [taxid:11106] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." - ], - "score": { - "usability_domain_length": 409 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000366/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Enzyme Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 409 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000030/1.0.25", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for melanoma cancer - This file contains FDA-approved human biomarker tests for melanoma cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." - ], - "score": { - "usability_domain_length": 409 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000048/1.0.36", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for melanoma cancer - This file contains FDA-approved human biomarker tests for melanoma cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." - ], - "score": { - "usability_domain_length": 409 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000051/1.0.36", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for prostate cancer - This file contains FDA-approved human biomarker tests for prostate cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." - ], - "score": { - "usability_domain_length": 409 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000226/v-2.1.1", - "usability_domain": [ - "The Rat Glycosylation Sites (RCSB PDB) dataset contains rat [taxid:11106] proteins glycosites mapped from the associated RCSB PDB Id's using SIFTS database (https://www.ebi.ac.uk/pdbe/docs/sifts/). The glycosylation data (PDB ID, GlycoSite, Chain ID, Sugar (first link)) was retrieved from RCSB group. [www.rcsb.org, Nucleic Acids Research, 28: 235-242.(https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102472/)]." - ], - "score": { - "usability_domain_length": 409 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000366/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Enzyme Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 409 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000367/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) (HCV) Protein Site Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 409 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000665/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref KEGG PATHWAY contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." - ], - "score": { - "usability_domain_length": 408 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000029/1.0.25", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for prostate cancer - This file contains FDA-approved human biomarker tests for prostate cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)" - ], - "score": { - "usability_domain_length": 408 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000665/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref KEGG PATHWAY contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to KEGG PATHWAY database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "KEGG PATHWAY is a collection of manually drawn pathway maps representing our knowledge on the molecular interaction, reaction and relation networks." - ], - "score": { - "usability_domain_length": 408 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000351/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Sequence Info (UniProtKB) dataset contains hcv1b [taxid:11108] UniProtKB protein sequence information that includes sequence version fasta header for the hcv1b accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000114/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref OMA contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000229/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref OMA contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes. " - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000420/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Enzyme Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000028/1.0.25", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for ovarian cancer - This file contains FDA-approved human biomarker tests for ovarian cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000050/1.0.36", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for ovarian cancer - This file contains FDA-approved human biomarker tests for ovarian cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000114/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref OMA contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000229/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref OMA contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes. " - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000351/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Sequence Info (UniProtKB) dataset contains hcv1b [taxid:11108] UniProtKB protein sequence information that includes sequence version fasta header for the hcv1b accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000420/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Enzyme Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 407 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000018/1.1", - "usability_domain": [ - "List of SRA IDs and associated data that are in data.argosdb.", - "This sheet was created to compile all SRA ids that have been added to ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." - ], - "score": { - "usability_domain_length": 406 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000113/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref OMA contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." - ], - "score": { - "usability_domain_length": 406 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000113/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref OMA contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." - ], - "score": { - "usability_domain_length": 406 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000100/v-2.0.2", - "usability_domain": [ - "The Mouse UniprotKB Xref ChEMBL contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." - ], - "score": { - "usability_domain_length": 405 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000099/v-2.0.2", - "usability_domain": [ - "The Human UniprotKB Xref ChEMBL contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." - ], - "score": { - "usability_domain_length": 405 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000289/v-2.0.2", - "usability_domain": [ - "The dataset provides the InChI key and glycan sequences in InChI format for the associated glycans (GlyTouCan Accession) based on the GlyTouCan to PubChem CID mapping (https://data.glygen.org/GLYDS000281). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000144). Source database: https://pubchem.ncbi.nlm.nih.gov/; https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 405 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000042/1.0.36", - "usability_domain": [ - "FDA-approved or cleared nucleic acid-based human biomarker tests for breast cancer - This file contains FDA-approved human biomarker tests for breast cancer. Each row represents one gene linked to its respective test. Genes are labeled by relevant identifiers/accessions from UniProtKB, HGNC, and EDRN. Tests are distinguished by manufacturer, FDA submission ID(s), clinical trial ID(s), and PubMed ID(s)." - ], - "score": { - "usability_domain_length": 405 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000099/v-2.1.1", - "usability_domain": [ - "The Human UniprotKB Xref ChEMBL contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." - ], - "score": { - "usability_domain_length": 405 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000100/v-2.1.1", - "usability_domain": [ - "The Mouse UniprotKB Xref ChEMBL contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." - ], - "score": { - "usability_domain_length": 405 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000464/v-2.0.2", - "usability_domain": [ - "The GlyGen-Pharos Cross-references Mapping dataset contains GlyGen's human [taxid:9606] UniProtKB accessions and corresponding GlyGen URLs. The dataset is derived from 2019-09 UniProtKB release.", - "The dataset is created for Pharos database to crosslink their protein page links to GlyGen protein pages.", - "The log file contains the Pharos protein accessions that could not be mapped to GlyGen protein accessions" - ], - "score": { - "usability_domain_length": 404 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000227/v-2.0.2", - "usability_domain": [ - "The Rat UniprotKB Xref ChEMBL contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-05 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties. " - ], - "score": { - "usability_domain_length": 404 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000227/v-2.1.1", - "usability_domain": [ - "The Rat UniprotKB Xref ChEMBL contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. The dataset is derived from 2019-05 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties. " - ], - "score": { - "usability_domain_length": 404 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000464/v-2.1.1", - "usability_domain": [ - "The GlyGen-Pharos Cross-references Mapping dataset contains GlyGen's human [taxid:9606] UniProtKB accessions and corresponding GlyGen URLs. The dataset is derived from 2019-09 UniProtKB release.", - "The dataset is created for Pharos database to crosslink their protein page links to GlyGen protein pages.", - "The log file contains the Pharos protein accessions that could not be mapped to GlyGen protein accessions" - ], - "score": { - "usability_domain_length": 404 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000008/1.5", - "usability_domain": [ - "Assembly metadata extracted from NCBI from the Original FDA-ARGOS BioProject. ", - "This table includes metadata from the FDA-ARGOS BioProject (PRJNA231221) with an additional annotation: taxonomy lineages retrieved from NCBI. The primary use case of this data is to visualize the different organisms and taxonomic lineages that are covered by the project and have been deposited into NCBI BioProject and SRA." - ], - "score": { - "usability_domain_length": 403 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000364/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein PTM Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 403 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000472/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein PTM Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 403 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000475/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Phosphorylation Sites (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 403 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000342/v-2.0.2", - "usability_domain": [ - "The dataset provides glycans that have at least one \"NeuAc\" residue in the current GlyGen dataset (https://data.glygen.org/GLYDS000281). The GlyTouCan accessions included in this list have mammalian species annotation (human/mouse/rat) through direct TaxID annotation and/or inferred via subsumption. This table was customized based on a GlyGen user query submitted through GlyGen help on Feb 17th 2020." - ], - "score": { - "usability_domain_length": 403 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000342/v-2.1.1", - "usability_domain": [ - "The dataset provides glycans that have at least one \"NeuAc\" residue in the current GlyGen dataset (https://data.glygen.org/GLYDS000281). The GlyTouCan accessions included in this list have mammalian species annotation (human/mouse/rat) through direct TaxID annotation and/or inferred via subsumption. This table was customized based on a GlyGen user query submitted through GlyGen help on Feb 17th 2020." - ], - "score": { - "usability_domain_length": 403 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000364/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein PTM Annotation (UniProtKB) dataset contains hcv1b [taxid:11116] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 403 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000472/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein PTM Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 403 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000475/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Phosphorylation Sites (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 403 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000022/1.0", - "usability_domain": [ - "Illumina metatranscriptomic reads containing a target virus of interest are assumed as input. Reads are first ran through quality control using fastp. They are then mapped to a host (human) genome and any reads that map sufficiently are removed. The resulting reads are then ran through the de novo assembler SPAdes. Finally, the assembled contigs are ran through QUAST for associated assembly metrics." - ], - "score": { - "usability_domain_length": 402 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000022/1.1", - "usability_domain": [ - "Illumina metatranscriptomic reads containing a target virus of interest are assumed as input. Reads are first ran through quality control using fastp. They are then mapped to a host (human) genome and any reads that map sufficiently are removed. The resulting reads are then ran through the de novo assembler SPAdes. Finally, the assembled contigs are ran through QUAST for associated assembly metrics." - ], - "score": { - "usability_domain_length": 402 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000385/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Phosphorylation Sites (UniProtKB) dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 402 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000365/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Enzyme Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 402 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000365/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Enzyme Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 402 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000385/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Phosphorylation Sites (UniProtKB) dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 402 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000479/v-2.0.2", - "usability_domain": [ - "List of SARS coronavirus (SARS-CoV-2 or 2019-nCoV) [taxid:2697049] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000434)" - ], - "score": { - "usability_domain_length": 401 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000444/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Site Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 401 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000490/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Diseases dataset contains disease caused by sarscov2 virus [taxid:2697049] which is COVID-19 mapped to all UniProtKB canonical accessions. The dataset contains COVID-19 disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 401 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000444/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Site Annotation (UniProtKB) dataset contains sarscov2 [taxid:2697049] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 401 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000479/v-2.1.1", - "usability_domain": [ - "List of SARS coronavirus (SARS-CoV-2 or 2019-nCoV) [taxid:2697049] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000434)" - ], - "score": { - "usability_domain_length": 401 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000490/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Diseases dataset contains disease caused by sarscov2 virus [taxid:2697049] which is COVID-19 mapped to all UniProtKB canonical accessions. The dataset contains COVID-19 disease ontology ID and GlyGen dataset ID. The input file for this dataset is compiled by GlyGen. If you use this dataset please give proper attribution to UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 401 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000641/v-2.0.2", - "usability_domain": [ - "The Fruitfly Gene Names (UniProtKB) dataset contains UniProtKB gene Names/names for fruitfly [taxid:7227] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 400 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000641/v-2.1.1", - "usability_domain": [ - "The Fruitfly Gene Names (UniProtKB) dataset contains UniProtKB gene Names/names for fruitfly [taxid:7227] proteins. It contains the recommended gene symbol, alternative gene symbol and ORF gene names assigned by the Gene Name Nomenclature commitees. (for more info - https://www.uniprot.org/help/gene_name). If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 400 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000494/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Site Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 398 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000018/1.4", - "usability_domain": [ - "ngs ID List, Selection Criteria and Key Table ", - "This dataset was created to compile all SRA IDs that have been added to data.ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." - ], - "score": { - "usability_domain_length": 398 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000494/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Site Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 398 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000735/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref O-GlcNAcAtlas dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to O-GlcNAcAtlas database accessions/identifiers. If you use this dataset please provide proper attribution to O-GlcNAcAtlas and GlyGen. O-GlcNAcAtlas is a a rigorously curated database for experimentally identified O-GlcNAc sites/proteins (https://oglcnac.org/atlas/)" - ], - "score": { - "usability_domain_length": 398 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000018/1.5", - "usability_domain": [ - "ngs ID List, Selection Criteria and Key Table ", - "This dataset was created to compile all SRA IDs that have been added to data.ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." - ], - "score": { - "usability_domain_length": 398 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000018/1.6", - "usability_domain": [ - "ngs ID List, Selection Criteria and Key Table", - "This dataset was created to compile all SRA IDs that have been added to data.ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." - ], - "score": { - "usability_domain_length": 397 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000393/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Names (NCBI RefSeq) dataset contains mouse [taxid:10090] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 396 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000363/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein PTM Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 396 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000018/1.3", - "usability_domain": [ - "ngs ID List, Selection Criteria and Master Table ", - "This dataset was created to compile all SRA ids that have been added to ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." - ], - "score": { - "usability_domain_length": 396 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000363/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein PTM Annotation (UniProtKB) dataset contains hcv1a [taxid:11108] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 396 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000393/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Names (NCBI RefSeq) dataset contains mouse [taxid:10090] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 396 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000392/v-2.0.2", - "usability_domain": [ - "The Human Protein Names (NCBI RefSeq) dataset contains human [taxid:9606] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 395 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000384/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Phosphorylation Sites (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 395 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000384/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Phosphorylation Sites (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 395 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000392/v-2.1.1", - "usability_domain": [ - "The Human Protein Names (NCBI RefSeq) dataset contains human [taxid:9606] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 395 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000125/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref Reactome contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 394 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000126/v-2.0.2", - "usability_domain": [ - "TheMouse UniProtKB Xref Reactome contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database." - ], - "score": { - "usability_domain_length": 394 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000260/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref Reactome contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 394 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000018/1.2", - "usability_domain": [ - "ngs ID List, Selection Criteria and Master Table ", - "This sheet was created to compile all SRA ids that have been added to ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." - ], - "score": { - "usability_domain_length": 394 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000125/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref Reactome contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 394 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000126/v-2.1.1", - "usability_domain": [ - "TheMouse UniProtKB Xref Reactome contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database." - ], - "score": { - "usability_domain_length": 394 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000260/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref Reactome contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 394 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000419/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Enzyme Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 393 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000419/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Enzyme Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 393 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000018/1.0", - "usability_domain": [ - "List of SRA ids that are in beta-data.argosdb.", - "This sheet was created to compile all SRA ids that have been added to ARGOSdb. These IDs were selected based on a variety of criteria listed in this table. Primary use of this data is to better understand how NGS files are selected for QC and also provide an opportunity for the users to see the list and recommend additional IDs to be analyzed." - ], - "score": { - "usability_domain_length": 391 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000736/v-2.1.1", - "usability_domain": [ - "The GlyGen-iPTMnet Crossreferences dataset contains UniProtKB glycoprotein accessions for human [taxid:9606], mouse [taxid:10090], rat [taxid:10116], fruitfly [taxid:7227], hcv1a [taxid:11108], hcv1b [taxid:11116], sarscov1 [taxid:694009] and sarscov2 [taxid:2697049] glycoproteins for the purpose of mapping UniProtKB glycosylation site info to PMID and GlyGen glycosylation site view URLs." - ], - "score": { - "usability_domain_length": 391 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000474/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Phosphorylation Sites (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000615/v-2.0.2", - "usability_domain": [ - "The The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) NCBI Protein Linkouts contains sarscov2 [taxid:2697049] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000691/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Binary Interaction (IntAct) dataset contains fruitfly [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000669/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref PDB contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000471/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein PTM Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000471/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein PTM Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2020-01 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000474/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Phosphorylation Sites (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000615/v-2.1.1", - "usability_domain": [ - "The The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) NCBI Protein Linkouts contains sarscov2 [taxid:2697049] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000669/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref PDB contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PDB database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "The Protein Data Bank is a database for the three-dimensional structural data of large biological molecules, such as proteins and nucleic acids. " - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000691/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Binary Interaction (IntAct) dataset contains fruitfly [taxid:10116] binary interaction data for UniProtKB canonical accession from IntAct database. The data is quality filtered by UniProt from IntAct and contains information about the interactors, number of experiments, IntAct Ids etc. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 389 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000670/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref Pfam contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " - ], - "score": { - "usability_domain_length": 388 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000670/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref Pfam contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Pfam database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Pfam is a database of protein families that includes their annotations and multiple sequence alignments generated using hidden Markov models. " - ], - "score": { - "usability_domain_length": 388 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000465/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Site Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 387 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000671/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniprotKB Xref PRO contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " - ], - "score": { - "usability_domain_length": 387 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000465/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Site Annotation (UniProtKB) dataset contains sarscov1 [taxid:694009] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 387 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000671/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniprotKB Xref PRO contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to PRO database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "PRO provides an ontological representation of protein-related entities by explicitly defining them and showing the relationships between them. " - ], - "score": { - "usability_domain_length": 387 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000656/v-2.0.2", - "usability_domain": [ - "The Fruitfly Transcript Locus (Ensembl Transcript coordinates)s contains UniProtKB fruitfly [taxid:7227] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end position. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 386 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000656/v-2.1.1", - "usability_domain": [ - "The Fruitfly Transcript Locus (Ensembl Transcript coordinates)s contains UniProtKB fruitfly [taxid:7227] canonical accessions mapped to their isoforms with further mapping to the corresponding ENSEMBL Transcript ID, Peptide ID, chromosome ID along with the transcripts' start and end position. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 386 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000473/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Glycosylation Sites (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 385 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000383/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Glycosylation Sites (UniProtKB) dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 385 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000239/v-2.0.2", - "usability_domain": [ - "The Human N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for human [taxid:9606] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 385 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000554/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref NCBI Gene dataset contains sarscov2 [taxid:2697049] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 385 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000239/v-2.1.1", - "usability_domain": [ - "The Human N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for human [taxid:9606] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. The dataset is derived form 2019-09 UniprotKB release." - ], - "score": { - "usability_domain_length": 385 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000383/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Glycosylation Sites (UniProtKB) dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 385 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000473/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Glycosylation Sites (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 385 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000554/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) UniProtKB Xref NCBI Gene dataset contains sarscov2 [taxid:2697049] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 385 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000434/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Proteome Master list dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 384 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000434/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Proteome Master list dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen." - ], - "score": { - "usability_domain_length": 384 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000345/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Proteome Materlist dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 383 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000032/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] differentially expressed genes or miRNAs with data in cancer samples from TCGA and ICGC - This file contains human [taxid:9606] differentially expressed genes in cancer samples. Genes are mapped to RefSeq transcripts and canonical UniProtKB/Swiss-Prot AC, and integrated through unification of Disease Ontology (DO) terms an Uberon Anatomical Entity Terms." - ], - "score": { - "usability_domain_length": 383 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000022/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] differentially expressed genes or miRNAs with data in cancer samples from TCGA and ICGC - This file contains human [taxid:9606] differentially expressed genes in cancer samples. Genes are mapped to RefSeq transcripts and canonical UniProtKB/Swiss-Prot AC, and integrated through unification of Disease Ontology (DO) terms an Uberon Anatomical Entity Terms." - ], - "score": { - "usability_domain_length": 383 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000345/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Proteome Materlist dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 383 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000538/v-2.1.1", - "usability_domain": [ - "The Glycan Citations (NCFG) dataset contains publication information for a library of complex multiantennary Asn-linked N-glycans generated by chemo-enzymatic synthesis, including PMID, journal name, date, author and title. The dataset is derived from the dataset contributed by the Richard Cummings Laboratory (NCFG BIDMC Harvard Medical School) http://data.glygen.org/GLY_000600." - ], - "score": { - "usability_domain_length": 381 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000742/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Reactions (Reactome) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 381 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000382/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Glycosylation Sites (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 378 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000235/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Citations (RefSeq) datasets contains mouse [taxid:10090] UniProtKB accessions mapped to publication information (PMID, title, journal name, publication, date, authors of corresponding RefSeq accessions). The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 378 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000235/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Citations (RefSeq) datasets contains mouse [taxid:10090] UniProtKB accessions mapped to publication information (PMID, title, journal name, publication, date, authors of corresponding RefSeq accessions). The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 378 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000382/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Glycosylation Sites (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 378 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_023769/1.0", - "usability_domain": [ - "This curated test computation evaluates the performance of Heptagon, a tool that performs base and SNP-calling for a previously computed alignment and provides quality and noise assessment profiles. ", - "Heptagon was used to identify SNPs from the previous Hexagon alignment of Whole Exome Sequencing of lung squamous carcinoma (SQCC) patients against human reference genome GRCh38." - ], - "score": { - "usability_domain_length": 377 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_025006/1.0", - "usability_domain": [ - "Full-length genome sequence of segmented RNA virus from ticks was obtained using small RNA sequencing data. this is the first study in which 5\u2032 and 3\u2032 sRNAs were used to generate full-length genome sequences of, but not limited to, RNA viruses. The workflow demonstrates the feasibility of using the sRNA-seq based methods for the detection of viruses in pooled/indiviudal RNA " - ], - "score": { - "usability_domain_length": 377 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000256/v-2.0.2", - "usability_domain": [ - "The Human Protein Citations (RefSeq) datasets contains human [taxid:9606] UniProtKB accessions mapped to publication information (PMID, title, journal name, publication, date, authors of corresponding RefSeq accessions). The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 377 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000108/1.0.36", - "usability_domain": [ - "A0412 is the Vysis ALK Break Apart FISH Probe Kit is a qualitative test to detect rearrangements involving the ALK gene via fluorescence in situ hybridization (FISH) in formalin-fixed paraffin-embedded (FFPE) non- small cell lung cancer (NSCLC) tissue specimens ... The test is for prescription use only. [FTCID:P110012]. This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 377 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000256/v-2.1.1", - "usability_domain": [ - "The Human Protein Citations (RefSeq) datasets contains human [taxid:9606] UniProtKB accessions mapped to publication information (PMID, title, journal name, publication, date, authors of corresponding RefSeq accessions). The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 377 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000319/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Reactions (Reactome) dataset contains reactions information for pathways for mouse [taxid:10090] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 376 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000029/v-2.0.2", - "usability_domain": [ - "Gene Expression (Cancer) dataset contains the gene expression information in cancers from BioXpress database. The GlyGen interface shows the gene expression (cancer) information for select cancer types/DOIDs. For more cancer types/DOIDs please refer - https://hive.biochemistry.gwu.edu/bioxpress. If you use this dataset please give proper attribution to BioXpress and GlyGen." - ], - "score": { - "usability_domain_length": 376 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000029/v-2.1.1", - "usability_domain": [ - "Gene Expression (Cancer) dataset contains the gene expression information in cancers from BioXpress database. The GlyGen interface shows the gene expression (cancer) information for select cancer types/DOIDs. For more cancer types/DOIDs please refer - https://hive.biochemistry.gwu.edu/bioxpress. If you use this dataset please give proper attribution to BioXpress and GlyGen." - ], - "score": { - "usability_domain_length": 376 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000319/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Reactions (Reactome) dataset contains reactions information for pathways for mouse [taxid:10090] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 376 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000310/v-2.0.2", - "usability_domain": [ - "The Human Protein Reactions (Reactome) dataset contains reactions information for pathways for human [taxid:9606] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 375 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000320/v-2.0.2", - "usability_domain": [ - "The Human Protein Reactions (Reactome) dataset contains reactions information for pathways for human [taxid:9606] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 375 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000310/v-2.1.1", - "usability_domain": [ - "The Human Protein Reactions (Reactome) dataset contains reactions information for pathways for human [taxid:9606] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 375 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000320/v-2.1.1", - "usability_domain": [ - "The Human Protein Reactions (Reactome) dataset contains reactions information for pathways for human [taxid:9606] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 375 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000466/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Information dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 373 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000466/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Information dataset contains hcv1b [taxid:11116] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 373 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000321/v-2.0.2", - "usability_domain": [ - "The Rat Protein Reactions (Reactome) dataset contains reactions information for pathways for rat [taxid:10116] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 372 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000688/v-2.0.2", - "usability_domain": [ - "\"The Fruitfly Protein Reaction Participants (Reactome) dataset contains reactions participants information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction IDs, participant ID, participant name, role, xref ID etc from Reactome database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen\"" - ], - "score": { - "usability_domain_length": 372 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000321/v-2.1.1", - "usability_domain": [ - "The Rat Protein Reactions (Reactome) dataset contains reactions information for pathways for rat [taxid:10116] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 372 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000688/v-2.1.1", - "usability_domain": [ - "\"The Fruitfly Protein Reaction Participants (Reactome) dataset contains reactions participants information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction IDs, participant ID, participant name, role, xref ID etc from Reactome database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen\"" - ], - "score": { - "usability_domain_length": 372 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000433/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Information dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000495/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Glycosylation Sites (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000264/v-2.0.2", - "usability_domain": [ - "The Rat Protein Citations (RefSeq) datasets contains rat [taxid:10116] UniProtKB accessions mapped to publication information (pmid, title, journal name, publication, date, authors of corresponding RefSeq accessions. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000556/v-2.0.2", - "usability_domain": [ - "The dataset provides citations for the associated glycan (GlyTouCan Accession) from GlycoMotif (https://glycomotif.glyomics.org/glycomotif/GlycoMotif). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281) and the associated PMIDs is derived from file Glycan Motif(https://data.glygen.org/GLY_000283)." - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000638/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Enzyme Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000614/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) NCBI Protein Linkouts contains sarscov1 [taxid:694009] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000467/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Proteome Master list dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000264/v-2.1.1", - "usability_domain": [ - "The Rat Protein Citations (RefSeq) datasets contains rat [taxid:10116] UniProtKB accessions mapped to publication information (pmid, title, journal name, publication, date, authors of corresponding RefSeq accessions. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000433/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Information dataset contains sarscov2 [taxid:2697049] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000467/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Proteome Master list dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000495/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Glycosylation Sites (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000614/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) NCBI Protein Linkouts contains sarscov1 [taxid:694009] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000638/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Enzyme Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 371 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000435/0.1", - "usability_domain": [ - "For this data set, we pulled biomarker data from OncoMX. From the raw TSV, the data was cleaned and panel biomarkers were filtered out. Then, temporary ID's were assigned based on the core fields (assessed_biomarker_entity_id, assessed_biomarker_entity, biomarker, and condition). The primary use case for this data set is to see information on human cancer biomarkers. " - ], - "score": { - "usability_domain_length": 370 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000012/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Canonical (UniProtKB) Sequences dataset contains mouse [taxid:10090] protein canonical fasta sequences from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 369 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000012/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Canonical (UniProtKB) Sequences dataset contains mouse [taxid:10090] protein canonical fasta sequences from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 369 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000354/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Information dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 366 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000354/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Information dataset contains hcv1a [taxid:11108] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 366 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000323/v-2.0.2", - "usability_domain": [ - "The Human Protein Enzyme Annotation (UniProtKB) dataset contains human [taxid:9606] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 365 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000324/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Enzyme Annotation (UniProtKB) dataset contains mouse [taxid:10090] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 365 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000323/v-2.1.1", - "usability_domain": [ - "The Human Protein Enzyme Annotation (UniProtKB) dataset contains human [taxid:9606] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 365 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000324/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Enzyme Annotation (UniProtKB) dataset contains mouse [taxid:10090] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 365 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000647/v-2.0.2", - "usability_domain": [ - "The Fruitfly Proteome Masterlist dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 364 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000647/v-2.1.1", - "usability_domain": [ - "The Fruitfly Proteome Masterlist dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is contributed by EMBL-EBI-UniProtKB and processed by GlyGen. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 364 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000041/v-2.0.2", - "usability_domain": [ - "List of mouse [taxid:10090] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000007)." - ], - "score": { - "usability_domain_length": 363 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000341/v-2.0.2", - "usability_domain": [ - "The dataset provides species annotation for the associated glycan (GlyTouCan Accession) generated from direct TaxID annotation and/or inferred via subsumption. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.ncbi.nlm.nih.gov/taxonomy." - ], - "score": { - "usability_domain_length": 363 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000268/v-2.0.2", - "usability_domain": [ - "The Human Tyr O-Linked Glycosylation Sites dataset contains human [taxid:9606] UniProtKB accessions that are Tyr O-Linked Glycosylated. The script that processes this dataset matches the amino acid and position with the latest release fasta file as a QC check to eliminate the incorrect entries. If you use this dataset please provide proper attribution to GlyGen" - ], - "score": { - "usability_domain_length": 363 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000041/v-2.1.1", - "usability_domain": [ - "List of mouse [taxid:10090] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000007)." - ], - "score": { - "usability_domain_length": 363 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000268/v-2.1.1", - "usability_domain": [ - "The Human Tyr O-Linked Glycosylation Sites dataset contains human [taxid:9606] UniProtKB accessions that are Tyr O-Linked Glycosylated. The script that processes this dataset matches the amino acid and position with the latest release fasta file as a QC check to eliminate the incorrect entries. If you use this dataset please provide proper attribution to GlyGen" - ], - "score": { - "usability_domain_length": 363 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000341/v-2.1.1", - "usability_domain": [ - "The dataset provides species annotation for the associated glycan (GlyTouCan Accession) generated from direct TaxID annotation and/or inferred via subsumption. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.ncbi.nlm.nih.gov/taxonomy." - ], - "score": { - "usability_domain_length": 363 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000356/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Citations dataset contains publication information for hcv1b [taxid:11116] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000640/v-2.0.2", - "usability_domain": [ - "The Fruitfly Gene Locus (Ensembl) contains fruitfly [taxid:7227] UniProtKB canonical accessions mapped to the corresponding ensembl Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000441/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Sequence Info (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000040/v-2.0.2", - "usability_domain": [ - "List of human [taxid:9606] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)." - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000673/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref O-GlcNAc (MCW) contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to O-GlcNAc (MCW) database accessions/identifiers. If you use this dataset please provide proper attribution to O-GlcNAc (MCW) and GlyGen. O - GlcNAc(MCW) is a database for O - GlcNAc glycosylation information. https://www.oglcnac.mcw.edu/" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000291/v-2.0.2", - "usability_domain": [ - "The dataset provides the glycan sequences in SMILES Isomeric format for the associated glycans (GlyTouCan Accession) based on the GlyTouCan to PubChem CID mapping. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://pubchem.ncbi.nlm.nih.gov/; https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000040/v-2.1.1", - "usability_domain": [ - "List of human [taxid:9606] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)." - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000291/v-2.1.1", - "usability_domain": [ - "The dataset provides the glycan sequences in SMILES Isomeric format for the associated glycans (GlyTouCan Accession) based on the GlyTouCan to PubChem CID mapping. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://pubchem.ncbi.nlm.nih.gov/; https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000356/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Citations dataset contains publication information for hcv1b [taxid:11116] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000441/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Sequence Info (UniProtKB) dataset contains sarscov2 [taxid:2697049] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000640/v-2.1.1", - "usability_domain": [ - "The Fruitfly Gene Locus (Ensembl) contains fruitfly [taxid:7227] UniProtKB canonical accessions mapped to the corresponding ensembl Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000673/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref O-GlcNAc (MCW) contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to O-GlcNAc (MCW) database accessions/identifiers. If you use this dataset please provide proper attribution to O-GlcNAc (MCW) and GlyGen. O - GlcNAc(MCW) is a database for O - GlcNAc glycosylation information. https://www.oglcnac.mcw.edu/" - ], - "score": { - "usability_domain_length": 362 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000325/v-2.0.2", - "usability_domain": [ - "The Rat Protein Enzyme Annotation (UniProtKB) dataset contains rat [taxid:10116] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000349/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Isoform sequences (UniProtKB) dataset contains hcv1b [taxid:11116] protein fasta sequences for the hcv1b isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000317/v-2.0.2", - "usability_domain": [ - "The Mouse Protein PTM Annotation (UniProtKB) dataset contains mouse [taxid:10090] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000254/v-2.0.2", - "usability_domain": [ - "The Mouse Phosphorylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000221/v-2.0.2", - "usability_domain": [ - "List of rat [taxid:10116] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000244)." - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000412/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Isoform sequences (UniProtKB) dataset contains sarscov2 [taxid:2697049] protein fasta sequences for the sarscov2 isoform accessions from the UniProtKB database. The dataset is derived from 2020_01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000221/v-2.1.1", - "usability_domain": [ - "List of rat [taxid:10116] proteins with information on glycosylation sites and associated glycans (if available) from UniCarbKB database [https://academic.oup.com/nar/article/42/D1/D215/1052197 ,https://doi.org/10.1093/nar/gkt1128]. The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000244)." - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000254/v-2.1.1", - "usability_domain": [ - "The Mouse Phosphorylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000317/v-2.1.1", - "usability_domain": [ - "The Mouse Protein PTM Annotation (UniProtKB) dataset contains mouse [taxid:10090] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000325/v-2.1.1", - "usability_domain": [ - "The Rat Protein Enzyme Annotation (UniProtKB) dataset contains rat [taxid:10116] enzyme annotations for UniProtKB canonical accessions. The dataset contains ec numbers and enzyme activity that describes the enzymatic reactions. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000349/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Isoform sequences (UniProtKB) dataset contains hcv1b [taxid:11116] protein fasta sequences for the hcv1b isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000412/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Isoform sequences (UniProtKB) dataset contains sarscov2 [taxid:2697049] protein fasta sequences for the sarscov2 isoform accessions from the UniProtKB database. The dataset is derived from 2020_01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 361 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000470/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Citations dataset contains publication information for sarscov2 [taxid:2697049] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 360 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000353/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) EBI-UniProtKB NT file contains proteome data for hcv1b [taxid:11116] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 360 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000087/1.0.36", - "usability_domain": [ - "A0391 is a biomarker panel of identified gene and protein alterations in colorectal cancer (DOID:9256) to screen adults of either sex, 50 years or older, who are at typical average risk for CRC. The genes are KRAS (UPKB:P01116), BMP3 (UPKB:P12645), NDRG4 (UPKB:Q9ULP0), and the hemoglobin (HBB) protein. This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 360 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000353/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) EBI-UniProtKB NT file contains proteome data for hcv1b [taxid:11116] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 360 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000470/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Citations dataset contains publication information for sarscov2 [taxid:2697049] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 360 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000033/1.2", - "usability_domain": [ - "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference proteome fasta sequences.", - "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 359 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000033/1.0", - "usability_domain": [ - "Salmonella typhimurium (strain LT2 / SGSC1412 / ATCC 700720) reference proteome fasta sequences.", - "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 359 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000327/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Site Annotation (UniProtKB) dataset contains mouse [taxid:10090] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 359 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000327/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Site Annotation (UniProtKB) dataset contains mouse [taxid:10090] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 359 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000316/v-2.0.2", - "usability_domain": [ - "The Human Protein PTM Annotation (UniProtKB) dataset contains human [taxid:9606] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000322/v-2.0.2", - "usability_domain": [ - "The Human Protein Site Annotation (UniProtKB) dataset contains human [taxid:9606] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000689/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Reaction Participants (Rhea) dataset contains reactions participants information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction IDs, participant ID, participant name, role, xref ID etc from Rhea database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Rhea and GlyGen" - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000666/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref OMA contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000436/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) EBI-UniProtKB NT file contains proteome data for sarscov2 [taxid:2697049] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000316/v-2.1.1", - "usability_domain": [ - "The Human Protein PTM Annotation (UniProtKB) dataset contains human [taxid:9606] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000322/v-2.1.1", - "usability_domain": [ - "The Human Protein Site Annotation (UniProtKB) dataset contains human [taxid:9606] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000436/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) EBI-UniProtKB NT file contains proteome data for sarscov2 [taxid:2697049] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000666/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref OMA contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to OMA database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "The OMA (Orthologous MAtrix) project is a method and database for the inference of orthologs among complete genomes." - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000689/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Reaction Participants (Rhea) dataset contains reactions participants information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction IDs, participant ID, participant name, role, xref ID etc from Rhea database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Rhea and GlyGen" - ], - "score": { - "usability_domain_length": 358 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000432/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Information dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000274/v-2.0.2", - "usability_domain": [ - "The Human Phosphorylation Sites (UniProtKB) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000231/v-2.0.2", - "usability_domain": [ - "The Rat Phosphorylation Sites (UniProtKB) dataset contains rat [taxid:10116] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000305/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to PubChem compound (CID) and substance (SID) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://pubchem.ncbi.nlm.nih.gov" - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000645/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Information (NCBI RefSeq) dataset contains fruitfly [taxid:7227] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000661/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniprotKB Xref ChEMBL contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000231/v-2.1.1", - "usability_domain": [ - "The Rat Phosphorylation Sites (UniProtKB) dataset contains rat [taxid:10116] UniProtKB canonical accessions with information on reported and predicted phosphorylation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000274/v-2.1.1", - "usability_domain": [ - "The Human Phosphorylation Sites (UniProtKB) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000305/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to PubChem compound (CID) and substance (SID) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://pubchem.ncbi.nlm.nih.gov" - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000432/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Information dataset contains sarscov1 [taxid:694009] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000645/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Information (NCBI RefSeq) dataset contains fruitfly [taxid:7227] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000661/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniprotKB Xref ChEMBL contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to ChEMBL database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - " ChEMBL or ChEMBLdb is a manually curated chemical database of bioactive molecules with drug-like properties." - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000556/v-2.1.1", - "usability_domain": [ - "The dataset provides citations for the associated glycan (GlyTouCan Accession) from GlycoMotif (https://glycomotif.glyomics.org/). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281) and the associated PMIDs are derived from the Glycan Motif dataset (https://data.glygen.org/GLY_000283)." - ], - "score": { - "usability_domain_length": 357 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000081/v-2.0.2", - "usability_domain": [ - "The Human Gene Locus (Ensembl) contains human [taxid:9606] UniProtKB canonical accessions mapped to the corresponding ensembl Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 356 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000081/v-2.1.1", - "usability_domain": [ - "The Human Gene Locus (Ensembl) contains human [taxid:9606] UniProtKB canonical accessions mapped to the corresponding ensembl Gene ID, chromosome ID along with the gene's start and end position on the chromosome. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 356 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000318/v-2.0.2", - "usability_domain": [ - "The Rat Protein PTM Annotation (UniProtKB) dataset contains rat [taxid:10116] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 355 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000328/v-2.0.2", - "usability_domain": [ - "The Rat Protein Site Annotation (UniProtKB) dataset contains rat [taxid:10116] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 355 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000318/v-2.1.1", - "usability_domain": [ - "The Rat Protein PTM Annotation (UniProtKB) dataset contains rat [taxid:10116] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 355 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000328/v-2.1.1", - "usability_domain": [ - "The Rat Protein Site Annotation (UniProtKB) dataset contains rat [taxid:10116] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. The dataset is derived form 2019-09 UniprotKB release. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 355 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000348/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Isoform sequences (UniProtKB) dataset contains hcv1a [taxid:11108] protein fasta sequences for the hcv1a isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 354 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000352/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) EBI-UniProtKB NT file contains proteome data for hcv1a [taxid:11108] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 354 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000348/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Isoform sequences (UniProtKB) dataset contains hcv1a [taxid:11108] protein fasta sequences for the hcv1a isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 354 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000352/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) EBI-UniProtKB NT file contains proteome data for hcv1a [taxid:11108] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 354 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000437/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Names (NCBI RefSeq) dataset contains sarscov1 [taxid:694009] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 353 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000437/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Names (NCBI RefSeq) dataset contains sarscov1 [taxid:694009] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 353 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000139/1.0", - "usability_domain": [ - "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." - ], - "score": { - "usability_domain_length": 352 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000139/1.0", - "usability_domain": [ - "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." - ], - "score": { - "usability_domain_length": 352 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000139/1.0", - "usability_domain": [ - "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." - ], - "score": { - "usability_domain_length": 352 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000139/1.0", - "usability_domain": [ - "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." - ], - "score": { - "usability_domain_length": 352 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000139/1.0", - "usability_domain": [ - "Accurately measure copies of all six DUF1220 clades and provide gene specific resolution of these clades. This allows one to discriminate among the ~300 haploid human DUF1220 copies to an extent not possible with any other method. The result is a greatly enhanced capability to analyze the role that these sequences play in human variation and disease." - ], - "score": { - "usability_domain_length": 352 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000362/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Gene Ontology (GO) dataset contains hcv1b [taxid:11116] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 352 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000350/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Sequences Info (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB protein sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 352 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000350/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Sequences Info (UniProtKB) dataset contains hcv1a [taxid:11108] UniProtKB protein sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 352 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000362/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Gene Ontology (GO) dataset contains hcv1b [taxid:11116] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 352 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000022/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Information (NCBI RefSeq) dataset contains mouse [taxid:10090] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 351 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000692/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references status to PubChem compound (CID) and substance (SID) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). The dataset allows to know whether a given GlyGen GlyTouCan has PubChem mapping or not." - ], - "score": { - "usability_domain_length": 351 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000021/v-2.0.2", - "usability_domain": [ - "The Human Protein Information (NCBI RefSeq) dataset contains human [taxid:9606] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 351 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000021/v-2.1.1", - "usability_domain": [ - "The Human Protein Information (NCBI RefSeq) dataset contains human [taxid:9606] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 351 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000022/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Information (NCBI RefSeq) dataset contains mouse [taxid:10090] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 351 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000692/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references status to PubChem compound (CID) and substance (SID) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). The dataset allows to know whether a given GlyGen GlyTouCan has PubChem mapping or not." - ], - "score": { - "usability_domain_length": 351 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000430/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Gene Ontology (GO) dataset contains sarscov2 [taxid:2697049] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 350 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000430/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Gene Ontology (GO) dataset contains sarscov2 [taxid:2697049] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 350 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000021/1.1", - "usability_domain": [ - "List of ngsQC data from Pond Lab", - "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. ngsQC was carried out using Pond Lab's NGS QC workflows constructed in Galaxy. The primary use case for this data set is to explore ngsQC results." - ], - "score": { - "usability_domain_length": 349 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000622/v-2.0.2", - "usability_domain": [ - "This dataset contains NCBI PubMed IDs associated with all of GlyGen's proteins and glycans with their corresponding GlyGen publication detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID. The dataset is for all GlyGen's organisms and the output file is split into three files." - ], - "score": { - "usability_domain_length": 348 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000440/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Sequence Info (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 348 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000440/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Sequence Info (UniProtKB) dataset contains sarscov1 [taxid:694009] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 348 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000622/v-2.1.1", - "usability_domain": [ - "This dataset contains NCBI PubMed IDs associated with all of GlyGen's proteins and glycans with their corresponding GlyGen publication detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID. The dataset is for all GlyGen's organisms and the output file is split into three files." - ], - "score": { - "usability_domain_length": 348 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000275/v-2.0.2", - "usability_domain": [ - "The Rat Protein Information (NCBI RefSeq) dataset contains rat [taxid:10116] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 347 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000411/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Isoform sequences (UniProtKB) dataset contains sarscov1 [taxid:694009] protein fasta sequences for the sarscov1 isoform accessions from the UniProtKB database. The dataset is derived from 2020_01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 347 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000469/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Function (UniProtKB) dataset contains biological function annotation for hcv1b [taxid:11116] protein accesions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 347 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000275/v-2.1.1", - "usability_domain": [ - "The Rat Protein Information (NCBI RefSeq) dataset contains rat [taxid:10116] protein information such as protein accession, name, length and protein function summary from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 347 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000411/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Isoform sequences (UniProtKB) dataset contains sarscov1 [taxid:694009] protein fasta sequences for the sarscov1 isoform accessions from the UniProtKB database. The dataset is derived from 2020_01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 347 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000469/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Function (UniProtKB) dataset contains biological function annotation for hcv1b [taxid:11116] protein accesions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 347 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000418/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Citations dataset contains publication information for sarscov1 [taxid:694009] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 346 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000672/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref Reactome contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 346 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000418/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Citations dataset contains publication information for sarscov1 [taxid:694009] UniProtKB accessions that includes PMID, journal name, date and author and title. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 346 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000672/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref Reactome contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Reactome database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 346 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000422/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Function (UniProtKB) dataset contains biological function annotation for sarscov2 [taxid:2697049] protein accesions from the UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 345 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000361/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Gene Ontology (GO) dataset contains hcv1a [taxid:11108] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 345 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000361/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Gene Ontology (GO) dataset contains hcv1a [taxid:11108] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 345 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000422/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Function (UniProtKB) dataset contains biological function annotation for sarscov2 [taxid:2697049] protein accesions from the UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 345 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000035/1.0", - "usability_domain": [ - "Lake Victoria marburgvirus (strain musoke-80) reference proteome fasta sequences.", - "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 344 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000030/1.0", - "usability_domain": [ - "Lake Victoria marburgvirus (strain musoke-80) reference proteome fasta sequences.", - "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 344 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000030/1.2", - "usability_domain": [ - "Lake Victoria marburgvirus (strain musoke-80) reference proteome fasta sequences.", - "This set was chosen based on UniProt curation emphasis and community use. The primary use case for this data set is to use the protein accessions and amino acid coordinates to refer to annotations related to drug resistance mutations, selection pressure and more." - ], - "score": { - "usability_domain_length": 344 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000435/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) EBI-UniProtKB NT file contains proteome data for sarscov1 [taxid:694009] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 344 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000435/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) EBI-UniProtKB NT file contains proteome data for sarscov1 [taxid:694009] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 344 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000406/v-2.0.2", - "usability_domain": [ - "The Mouse NCBI Protein Linkouts contains mouse [taxid:10090] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 343 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000523/v-2.0.2", - "usability_domain": [ - "The Mouse Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 343 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000281/v-2.0.2", - "usability_domain": [ - "The dataset provides information on the glycan properties such as mass, permethylated mass, topology, base composition, composition and the number of monosaccharides for the associated glycan (GlyTouCan Accession). This dataset is the master-list of the GlyTouCan accessions currently present in GlyGen. Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 343 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000281/v-2.1.1", - "usability_domain": [ - "The dataset provides information on the glycan properties such as mass, permethylated mass, topology, base composition, composition and the number of monosaccharides for the associated glycan (GlyTouCan Accession). This dataset is the master-list of the GlyTouCan accessions currently present in GlyGen. Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 343 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000406/v-2.1.1", - "usability_domain": [ - "The Mouse NCBI Protein Linkouts contains mouse [taxid:10090] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 343 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000523/v-2.1.1", - "usability_domain": [ - "The Mouse Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 343 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000405/v-2.0.2", - "usability_domain": [ - "The Human NCBI Protein Linkouts contains human [taxid:9606] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 342 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000405/v-2.1.1", - "usability_domain": [ - "The Human NCBI Protein Linkouts contains human [taxid:9606] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 342 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000224/v-2.0.2", - "usability_domain": [ - "The Rat Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10116] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 341 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000039/v-2.0.2", - "usability_domain": [ - "The Mouse Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 341 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000039/v-2.1.1", - "usability_domain": [ - "The Mouse Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 341 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000224/v-2.1.1", - "usability_domain": [ - "The Rat Glycosylation Sites (UniProtKB) dataset contains mouse [taxid:10116] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 341 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000038/v-2.0.2", - "usability_domain": [ - "The Human Glycosylation Sites (UniProtKB) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 340 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000468/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Function (UniProtKB) dataset contains biological function annotation for hcv1a [taxid:11108] protein accesions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 340 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000038/v-2.1.1", - "usability_domain": [ - "The Human Glycosylation Sites (UniProtKB) dataset contains human [taxid:9606] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 340 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000468/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Function (UniProtKB) dataset contains biological function annotation for hcv1a [taxid:11108] protein accesions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 340 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000347/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Canonical (UniProtKB) Sequences dataset contains hcv1b [taxid:11116] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 339 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000407/v-2.0.2", - "usability_domain": [ - "The Rat NCBI Protein Linkouts contains rat [taxid:10116] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 339 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000244/v-2.0.2", - "usability_domain": [ - "The Rat Proteome Master list dataset contains rat [taxid:10116] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 339 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000096/1.0.36", - "usability_domain": [ - "A0400 is a panel of identified protein prognostic biomarkers (differential expression) and a gene prognostic biomarker (mutation) in colorectal cancer. The proteins are MLH1 (UPKB:P40692), MSH2 (UPKB:P43246), MSH6 (UPKB:P52701), PMS2 (UPKB:P54278) and the gene is BRAF (UPKB:P15056). This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 339 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000244/v-2.1.1", - "usability_domain": [ - "The Rat Proteome Master list dataset contains rat [taxid:10116] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 339 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000347/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Canonical (UniProtKB) Sequences dataset contains hcv1b [taxid:11116] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 339 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000407/v-2.1.1", - "usability_domain": [ - "The Rat NCBI Protein Linkouts contains rat [taxid:10116] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID.", - "The RefSeq accessions are The dataset is derived from NCBI RefSeq Release 96, September 9, 2019" - ], - "score": { - "usability_domain_length": 339 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000001/v-2.0.2", - "usability_domain": [ - "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 338 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000460/v-2.0.2", - "usability_domain": [ - "The Mouse Germline Mutation dataset contains mouse [taxid:10090] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." - ], - "score": { - "usability_domain_length": 338 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000001/2.0.3", - "usability_domain": [ - "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 338 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000001/2.1.1", - "usability_domain": [ - "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 338 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000001/v-2.1.1", - "usability_domain": [ - "The Human Proteome Materlist dataset contains human [taxid:9606] UniProtKB canonical accessions mapped to the reviewed (SwissProt) and unreviewed (TrEMBL) UniProtKB isoforms via gene grouping. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 338 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000460/v-2.1.1", - "usability_domain": [ - "The Mouse Germline Mutation dataset contains mouse [taxid:10090] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." - ], - "score": { - "usability_domain_length": 338 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000492/v-2.0.2", - "usability_domain": [ - "List of mouse [taxid:10090] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 337 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000459/v-2.0.2", - "usability_domain": [ - "The Human Germline Mutation dataset contains human [taxid:9606] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." - ], - "score": { - "usability_domain_length": 337 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000416/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Canonical (UniProtKB) Sequences dataset contains sarscov2 [taxid:2697049] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 337 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000643/v-2.0.2", - "usability_domain": [ - "The Fruitfly N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for fruitfly [taxid:7227] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. " - ], - "score": { - "usability_domain_length": 337 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000024/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] differentially expressed miRNAs with data in cancer samples from TCGA and ICGC - This file contains human [taxid:9606] differentially expressed miRNA in cancer samples. miRNAs are mapped to RefSeq transcripts, and integrated through unification of Disease Ontology (DO) terms an Uberon Anatomical Entity Terms." - ], - "score": { - "usability_domain_length": 337 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000416/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Canonical (UniProtKB) Sequences dataset contains sarscov2 [taxid:2697049] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 337 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000459/v-2.1.1", - "usability_domain": [ - "The Human Germline Mutation dataset contains human [taxid:9606] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." - ], - "score": { - "usability_domain_length": 337 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000492/v-2.1.1", - "usability_domain": [ - "List of mouse [taxid:10090] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 337 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000643/v-2.1.1", - "usability_domain": [ - "The Fruitfly N-Glycosylation Sequon dataset contains N-glycosylation sequon or motif or consensus sequence for fruitfly [taxid:7227] UniProtKB canonical sequences identified by the script. The script identifies the N-glycosylation sequon pattern - Asn-X-Ser/Thr , where X is any amino acid except proline (Pro) for UniProtKB accessions. " - ], - "score": { - "usability_domain_length": 337 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000686/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Reactions (Reactome) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen" - ], - "score": { - "usability_domain_length": 336 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000429/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Gene Ontology (GO) dataset contains sarscov1 [taxid:694009] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 336 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000481/v-2.0.2", - "usability_domain": [ - "List of human [taxid:9606] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 336 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000429/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Gene Ontology (GO) dataset contains sarscov1 [taxid:694009] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2020-01 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 336 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000481/v-2.1.1", - "usability_domain": [ - "List of human [taxid:9606] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 336 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000686/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Reactions (Reactome) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants, pathway ID, reaction ID, PMID etc from Reactome database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen" - ], - "score": { - "usability_domain_length": 336 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000493/v-2.0.2", - "usability_domain": [ - "List of rat [taxid:10116] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 335 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000461/v-2.0.2", - "usability_domain": [ - "The Rat Germline Mutation dataset contains rat [taxid:10116] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB. " - ], - "score": { - "usability_domain_length": 335 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000461/v-2.1.1", - "usability_domain": [ - "The Rat Germline Mutation dataset contains rat [taxid:10116] germline mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) germline mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB. " - ], - "score": { - "usability_domain_length": 335 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000493/v-2.1.1", - "usability_domain": [ - "List of rat [taxid:10116] proteins with information on glycosylation sites detected from PubMed database(abstracts-only) through Automatic mining tool developed by Dr. Vijay Shanker (University of Delaware). The listed protein (UniProtKB) accessions are part of the GlyGen UniProtKB canonical list (https://data.glygen.org/GLYDS000001)" - ], - "score": { - "usability_domain_length": 335 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000008/1.2", - "usability_domain": [ - "Assembly metadata extracted from NCBI BioProjects.", - "This table includes metadata from NCBI BioProjects with an additional annotation: taxonomy lineages retrieved from NCBI. The primary use case of this data is to visualize the different organisms and taxonomic lineages that are covered by the projects and have been deposited into SRA." - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000008/1.3", - "usability_domain": [ - "Assembly metadata extracted from NCBI BioProjects.", - "This table includes metadata from NCBI BioProjects with an additional annotation: taxonomy lineages retrieved from NCBI. The primary use case of this data is to visualize the different organisms and taxonomic lineages that are covered by the projects and have been deposited into SRA." - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000008/1.4", - "usability_domain": [ - "Assembly metadata extracted from NCBI BioProjects.", - "This table includes metadata from NCBI BioProjects with an additional annotation: taxonomy lineages retrieved from NCBI. The primary use case of this data is to visualize the different organisms and taxonomic lineages that are covered by the projects and have been deposited into SRA." - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000462/v-2.0.2", - "usability_domain": [ - "The Human Somatic Mutation dataset contains human [taxid:9606] somatic mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) somatic mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000646/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Information dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000303/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to KEGG glycan ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://www.genome.jp/kegg/glycan/" - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000230/v-2.0.2", - "usability_domain": [ - "The Human Congenital Disorders of Glycosylation dataset contains a list of congenital disorders of glycosylation mapped to human [taxid:9606] UniProtKB accessions. The dataset also contains mouse orthologs and links to phenotype page on MGI. If you use this dataset please provide proper attribution to Monarch Initiative and GlyGen. " - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000230/v-2.1.1", - "usability_domain": [ - "The Human Congenital Disorders of Glycosylation dataset contains a list of congenital disorders of glycosylation mapped to human [taxid:9606] UniProtKB accessions. The dataset also contains mouse orthologs and links to phenotype page on MGI. If you use this dataset please provide proper attribution to Monarch Initiative and GlyGen. " - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000303/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to KEGG glycan ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://www.genome.jp/kegg/glycan/" - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000462/v-2.1.1", - "usability_domain": [ - "The Human Somatic Mutation dataset contains human [taxid:9606] somatic mutation data from EBI Variant Data API. Only nonsynonymous single-nucleotide variations (nsSNVs) somatic mutations are integrated. The dataset is from 2019-11 UniProtKB release .If you use this dataset provide proper attribution to GlyGen and EBI-EMBL-UniProtKB." - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000646/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Information dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 334 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000346/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Canonical (UniProtKB) Sequences dataset contains hcv1a [taxid:11108] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 332 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000302/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to GLYCOSCIENCES.DE ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/;http://www.glycosciences.de/" - ], - "score": { - "usability_domain_length": 332 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000302/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to GLYCOSCIENCES.DE ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/;http://www.glycosciences.de/" - ], - "score": { - "usability_domain_length": 332 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000346/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Canonical (UniProtKB) Sequences dataset contains hcv1a [taxid:11108] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 332 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000524/v-2.0.2", - "usability_domain": [ - "The Rat Glycation Sites (UniProtKB) dataset contains mouse [taxid:10116] UniProtKB canonical accessions with information on reported and predicted glycation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 331 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000421/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Function (UniProtKB) dataset contains biological function annotation for sarscov1 [taxid:694009] protein accesions from the UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 331 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000690/v-2.0.2", - "usability_domain": [ - "This dataset contains fruitfly [taxid:7227] pathway information from Reactome database and mapped to the UniProtKB canonical accessions. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen", - "Reactome is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 331 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000687/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Reactions (Rhea) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants ID, reaction ID, pathway ID, summary etc from Rhea database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Rhea and GlyGen\"" - ], - "score": { - "usability_domain_length": 331 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000421/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Function (UniProtKB) dataset contains biological function annotation for sarscov1 [taxid:694009] protein accesions from the UniProtKB database. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 331 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000524/v-2.1.1", - "usability_domain": [ - "The Rat Glycation Sites (UniProtKB) dataset contains mouse [taxid:10116] UniProtKB canonical accessions with information on reported and predicted glycation sites in UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 331 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000687/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Reactions (Rhea) dataset contains reactions information for pathways for fruitfly [taxid:7227] UniProtKB accessions that includes reaction participants ID, reaction ID, pathway ID, summary etc from Rhea database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Rhea and GlyGen\"" - ], - "score": { - "usability_domain_length": 331 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000690/v-2.1.1", - "usability_domain": [ - "This dataset contains fruitfly [taxid:7227] pathway information from Reactome database and mapped to the UniProtKB canonical accessions. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB, Reactome and GlyGen", - "Reactome is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 331 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000034/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Information dataset contains mouse [taxid:10090] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 329 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000034/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Information dataset contains mouse [taxid:10090] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 329 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000019/1.0", - "usability_domain": [ - "List of ngsQC data from HIVE Lab.", - "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results." - ], - "score": { - "usability_domain_length": 328 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000019/1.1", - "usability_domain": [ - "List of ngsQC data from HIVE Lab.", - "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as SARS-CoV-2 and influenza. ngsQC was carried out using HIVE's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results." - ], - "score": { - "usability_domain_length": 328 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000033/v-2.0.2", - "usability_domain": [ - "The Human Protein Information dataset contains human [taxid:9606] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 328 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000306/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to UniCarbDB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://unicarb-db.expasy.org/" - ], - "score": { - "usability_domain_length": 328 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000033/v-2.1.1", - "usability_domain": [ - "The Human Protein Information dataset contains human [taxid:9606] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 328 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000306/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to UniCarbDB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://unicarb-db.expasy.org/" - ], - "score": { - "usability_domain_length": 328 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000301/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to GlyConnect ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/;https://glyconnect.expasy.org" - ], - "score": { - "usability_domain_length": 327 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000241/v-2.0.2", - "usability_domain": [ - "The Rat Protein Information dataset contains rat [taxid:10116] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 327 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000241/v-2.1.1", - "usability_domain": [ - "The Rat Protein Information dataset contains rat [taxid:10116] UniProtKB canonical accessions with their UniProtKB entry [id/mnemonic] name and properties such as mass and length. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 327 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000301/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to GlyConnect ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/;https://glyconnect.expasy.org" - ], - "score": { - "usability_domain_length": 327 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000133/v-2.0.2", - "usability_domain": [ - "The Human Protein Function (NCBI RefSeq) dataset contains biological function annotation of human [taxid:9606] genes from GeneRIF section in the NCBI RefSeq database. The dataset is downloaded from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 326 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000133/v-2.1.1", - "usability_domain": [ - "The Human Protein Function (NCBI RefSeq) dataset contains biological function annotation of human [taxid:9606] genes from GeneRIF section in the NCBI RefSeq database. The dataset is downloaded from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 326 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000134/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Function (NCBI RefSeq) dataset contains biological function annotation of mouse [taxid:10090] genes from GeneRIF section in the NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 324 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000134/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Function (NCBI RefSeq) dataset contains biological function annotation of mouse [taxid:10090] genes from GeneRIF section in the NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 324 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000058/1.0", - "usability_domain": [ - "The dataset provides cross-references to the Glycan Dictionary Accessions for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY000281). Source database: https://wiki.glygen.org/index.php/Glycan_structure_dictionary" - ], - "score": { - "usability_domain_length": 323 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000307/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to UniCarbKB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; http://www.unicarbkb.org/" - ], - "score": { - "usability_domain_length": 323 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000620/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to the Glycan Dictionary Accessions for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY000281). Source database: https://wiki.glygen.org/index.php/Glycan_structure_dictionary" - ], - "score": { - "usability_domain_length": 323 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000415/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Canonical (UniProtKB) Sequences dataset contains sarscov1 [taxid:694009] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 323 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000107/1.0.36", - "usability_domain": [ - "A0411 is the 23andMe PGS Genetic Health Risk Report for MUTYHAssociated Polyposis is indicated for reporting of the Y179C and the G396D variants in the MUTYH gene. The report describes if a person is at increased risk of developing colorectal cancer. [FTCID:K182784]. This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 323 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000307/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to UniCarbKB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; http://www.unicarbkb.org/" - ], - "score": { - "usability_domain_length": 323 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000415/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Canonical (UniProtKB) Sequences dataset contains sarscov1 [taxid:694009] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 323 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000620/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to the Glycan Dictionary Accessions for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY000281). Source database: https://wiki.glygen.org/index.php/Glycan_structure_dictionary" - ], - "score": { - "usability_domain_length": 323 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000394/v-2.0.2", - "usability_domain": [ - "The Rat Protein Names (NCBI RefSeq) dataset contains rat [taxid:10116] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 321 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000394/v-2.1.1", - "usability_domain": [ - "The Rat Protein Names (NCBI RefSeq) dataset contains rat [taxid:10116] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 321 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000249/v-2.0.2", - "usability_domain": [ - "The Rat Protein Function (NCBI RefSeq) dataset contains biological function annotation of rat [taxid:10116] genes from GeneRIF section in the NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 320 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000399/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Sequence Info (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 320 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000249/v-2.1.1", - "usability_domain": [ - "The Rat Protein Function (NCBI RefSeq) dataset contains biological function annotation of rat [taxid:10116] genes from GeneRIF section in the NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen. " - ], - "score": { - "usability_domain_length": 320 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000399/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Sequence Info (UniProtKB) dataset contains mouse [taxid:10090] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 320 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000025/1.1", - "usability_domain": [ - "List of ngsQC data from Crandall Lab", - "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as Salmonella. ngsQC was carried out using HIVE1's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results. " - ], - "score": { - "usability_domain_length": 319 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000295/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to CarbBank ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.genome.jp/dbget-bin/www_bfind?carbbank" - ], - "score": { - "usability_domain_length": 319 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000463/v-2.0.2", - "usability_domain": [ - "The Human Literature Mutation dataset contains human [taxid:9606] mutation data extracted from literature using the DIMEX tool. The dataset contains only the mutations that have amino acid information and position in the mutation mention column.. If you use this dataset provide proper attribution to GlyGen and OncoMX." - ], - "score": { - "usability_domain_length": 319 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000398/v-2.0.2", - "usability_domain": [ - "The Human Protein Sequence Info (UniProtKB) dataset contains human [taxid:9606] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 319 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000295/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to CarbBank ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.genome.jp/dbget-bin/www_bfind?carbbank" - ], - "score": { - "usability_domain_length": 319 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000398/v-2.1.1", - "usability_domain": [ - "The Human Protein Sequence Info (UniProtKB) dataset contains human [taxid:9606] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 319 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000463/v-2.1.1", - "usability_domain": [ - "The Human Literature Mutation dataset contains human [taxid:9606] mutation data extracted from literature using the DIMEX tool. The dataset contains only the mutations that have amino acid information and position in the mutation mention column.. If you use this dataset provide proper attribution to GlyGen and OncoMX." - ], - "score": { - "usability_domain_length": 319 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000650/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein PTM Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. If you use this dataset please give proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 318 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000058/1.0.36", - "usability_domain": [ - "List of human [taxid:9606] biomarkers to measure thrombosis in cancer patients. This file contains 12 biomarkers extracted from the CSSI blood biomarkers retrieved fro the CSSI portal. There are 12 biomarkers to measure thrombosis. https://cssi-dcc.nci.nih.gov/cssiportal/ and https://cssi.cancer.gov/cancer-thrombosis" - ], - "score": { - "usability_domain_length": 318 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000650/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein PTM Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] PTM annotations for UniProtKB canonical accessions. The annotation is the curated statements of mainly about glycosylation and phosphorylation. If you use this dataset please give proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 318 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000025/1.0", - "usability_domain": [ - "List of ngsQC data from Crandall Lab", - "This sheet was created to compile ngsQC data from pathogens in the FDA-ARGOS project as well as several other rapidly emerging pathogens such as Salmonella. ngsQC was carried out using HIVE1's Multiple NGS QC tool. The primary use case for this data set is to explore ngsQC results" - ], - "score": { - "usability_domain_length": 317 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000013/v-2.0.2", - "usability_domain": [ - "The Mouse EBI-UniProtKB NT file contains proteome data for mouse [taxid:10090] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 317 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000013/v-2.1.1", - "usability_domain": [ - "The Mouse EBI-UniProtKB NT file contains proteome data for mouse [taxid:10090] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 317 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000358/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for hcv1b [taxid:10116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 316 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000400/v-2.0.2", - "usability_domain": [ - "The Rat Protein Sequence Info (UniProtKB) dataset contains rat [taxid:10116] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 316 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000053/v-2.0.2", - "usability_domain": [ - "The Human Protein Isoform sequences (UniProtKB) dataset contains human [taxid:9606] protein fasta sequences for the human isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 316 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000054/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Isoform sequences (UniProtKB) dataset contains mouse [taxid:10090] protein fasta sequences for the mouse isoform accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 316 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000053/v-2.1.1", - "usability_domain": [ - "The Human Protein Isoform sequences (UniProtKB) dataset contains human [taxid:9606] protein fasta sequences for the human isoform accessions from the UniProtKB database,. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 316 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000054/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Isoform sequences (UniProtKB) dataset contains mouse [taxid:10090] protein fasta sequences for the mouse isoform accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 316 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000358/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for hcv1b [taxid:10116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 316 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000400/v-2.1.1", - "usability_domain": [ - "The Rat Protein Sequence Info (UniProtKB) dataset contains rat [taxid:10116] UniProtKB protein fasta sequence information that includes sequence version and fasta header. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 316 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000360/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for hcv1b [taxid:11116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 315 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000003/v-2.0.2", - "usability_domain": [ - "The Human EBI-UniProtKB NT file contains proteome data for human [taxid:9606] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 315 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000003/v-2.1.1", - "usability_domain": [ - "The Human EBI-UniProtKB NT file contains proteome data for human [taxid:9606] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 315 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000360/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1b, isolate Japanese) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for hcv1b [taxid:11116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 315 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000674/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref FlyBase contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to FlyBase database accessions/identifiers. If you use this dataset please provide proper attribution to FlyBase and GlyGen", - "FlyBase is a database for Drosophila gene and genomes. https://flybase.org/" - ], - "score": { - "usability_domain_length": 314 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000439/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for sarscov2 [taxid:2697049] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 314 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000439/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCov) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for sarscov2 [taxid:2697049] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 314 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000674/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref FlyBase contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to FlyBase database accessions/identifiers. If you use this dataset please provide proper attribution to FlyBase and GlyGen", - "FlyBase is a database for Drosophila gene and genomes. https://flybase.org/" - ], - "score": { - "usability_domain_length": 314 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000012/1.0", - "usability_domain": [ - "QC of short read sequences assembled into genomes.", - "Data is pulled from NCBI and run through a self-alignment/variant calling pipeline to screen for inconsistencies. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies." - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000012/1.1", - "usability_domain": [ - "QC of short read sequences assembled into genomes.", - "Data is pulled from NCBI and run through a self-alignment/variant calling pipeline to screen for inconsistencies. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies." - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000012/1.2", - "usability_domain": [ - "QC of short read sequences assembled into genomes.", - "Data is pulled from NCBI and run through a self-alignment/variant calling pipeline to screen for inconsistencies. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies." - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000012/1.4", - "usability_domain": [ - "QC of short read sequences assembled into genomes.", - "Data is pulled from NCBI and run through a self-alignment/variant calling pipeline to screen for inconsistencies. Results are compared to published metadata on NCBI for each assembly. The primary use case for this data set is to explore QC metrics for assemblies." - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000496/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for sarscov2 [taxid:2697049] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000304/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to PDB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://www.rcsb.org/" - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000388/v-2.0.2", - "usability_domain": [ - "The Mouse Gene Symbols (NCBI RefSeq) dataset contains mouse [taxid:10090] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000304/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to PDB ID(s) for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://www.glycome-db.org/; https://www.rcsb.org/" - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000388/v-2.1.1", - "usability_domain": [ - "The Mouse Gene Symbols (NCBI RefSeq) dataset contains mouse [taxid:10090] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000496/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-2 or 2019-nCoV) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for sarscov2 [taxid:2697049] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 313 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000255/v-2.0.2", - "usability_domain": [ - "The Rat Protein Isoform sequences (UniProtKB) dataset contains rat [taxid:10116] protein fasta sequences for the rat isoform accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 312 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000252/v-2.0.2", - "usability_domain": [ - "The Mouse Gene Ontology (GO) dataset contains mouse [taxid:10090] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 312 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000278/v-2.0.2", - "usability_domain": [ - "The Rat EBI-UniProtKB NT file contains proteome data for rat [taxid:10116] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 312 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000387/v-2.0.2", - "usability_domain": [ - "The Human Gene Symbols (NCBI RefSeq) dataset contains human [taxid:9606] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 312 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000252/v-2.1.1", - "usability_domain": [ - "The Mouse Gene Ontology (GO) dataset contains mouse [taxid:10090] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 312 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000255/v-2.1.1", - "usability_domain": [ - "The Rat Protein Isoform sequences (UniProtKB) dataset contains rat [taxid:10116] protein fasta sequences for the rat isoform accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 312 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000278/v-2.1.1", - "usability_domain": [ - "The Rat EBI-UniProtKB NT file contains proteome data for rat [taxid:10116] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 312 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000387/v-2.1.1", - "usability_domain": [ - "The Human Gene Symbols (NCBI RefSeq) dataset contains human [taxid:9606] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 312 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000258/v-2.0.2", - "usability_domain": [ - "The Homolog Clusters dataset contains homologs of human [taxid: 9606], mouse [taxid:10090] and rat [taxid:10116] species mapped to UniProtKB canonical accessions. The homologs data is downloaded from OMA browser and MGI. If you use this dataset please provide proper attribution to MGI, OMA Browser and GlyGen. " - ], - "score": { - "usability_domain_length": 311 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000258/v-2.1.1", - "usability_domain": [ - "The Homolog Clusters dataset contains homologs of human [taxid: 9606], mouse [taxid:10090] and rat [taxid:10116] species mapped to UniProtKB canonical accessions. The homologs data is downloaded from OMA browser and MGI. If you use this dataset please provide proper attribution to MGI, OMA Browser and GlyGen. " - ], - "score": { - "usability_domain_length": 311 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000389/v-2.0.2", - "usability_domain": [ - "The Rat Gene Symbols (NCBI RefSeq) dataset contains rat [taxid:10116] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 310 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000654/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Site Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 310 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000389/v-2.1.1", - "usability_domain": [ - "The Rat Gene Symbols (NCBI RefSeq) dataset contains rat [taxid:10116] gene symbols/names (primary gene name and gene synonyms) from NCBI RefSeq database. The dataset is derived from NCBI RefSeq Release 96, September 9, 2019. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 310 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000654/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Site Annotation (UniProtKB) dataset contains fruitfly [taxid:7227] site annotations for UniProtKB canonical accessions. The dataset contains information about UniProtKB annotations with their amino acid range. If you use this dataset please give proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 310 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000212/2.9", - "usability_domain": [ - " \"Identify treatment emergent amino acid (substitutions)[SO:1000002] that correlate with antiviral drug treatment failure\",", - " \"Determine whether the treatment emergent amino acid (substitutions)[SO:1000002] identified correlate with treatment failure involving other drugs against the same virus\"," - ], - "score": { - "usability_domain_length": 309 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000223/v-2.0.2", - "usability_domain": [ - "The Human Gene Ontology (GO) dataset contains human [taxid:9606] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 309 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000357/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for hcv1a [taxid:11108] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 309 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000223/v-2.1.1", - "usability_domain": [ - "The Human Gene Ontology (GO) dataset contains human [taxid:9606] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 309 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000357/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for hcv1a [taxid:11108] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 309 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000359/v-2.0.2", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for hcv1a [taxid:11108] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 308 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000682/v-2.0.2", - "usability_domain": [ - "The Fruitfly Phosphorylation Sites (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 308 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000285/v-2.0.2", - "usability_domain": [ - "The dataset provides citations for the associated glycan (GlyTouCan Accession) from GlyTouCan and UniCarbKB database.The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://www.unicarbkb.org/" - ], - "score": { - "usability_domain_length": 308 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000242/v-2.0.2", - "usability_domain": [ - "The Rat Gene Ontology (GO) dataset contains rat [taxid:10116] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 308 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000242/v-2.1.1", - "usability_domain": [ - "The Rat Gene Ontology (GO) dataset contains rat [taxid:10116] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 308 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000285/v-2.1.1", - "usability_domain": [ - "The dataset provides citations for the associated glycan (GlyTouCan Accession) from GlyTouCan and UniCarbKB database.The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://www.unicarbkb.org/" - ], - "score": { - "usability_domain_length": 308 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000359/v-2.1.1", - "usability_domain": [ - "The Hepatitis C virus (genotype 1a, isolate H) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for hcv1a [taxid:11108] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 308 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000682/v-2.1.1", - "usability_domain": [ - "The Fruitfly Phosphorylation Sites (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported and predicted phosphoryation sites with evidence in UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 308 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000282/v-2.0.2", - "usability_domain": [ - "The dataset provides information on the glycan classification such as glycan type and sub-type for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 307 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000282/v-2.1.1", - "usability_domain": [ - "The dataset provides information on the glycan classification such as glycan type and sub-type for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 307 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000132/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Function (UniProtKB) dataset contains biological function annotation for mouse [taxid:10090] protein accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 306 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000021/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] genes with cancer data in BioMuta and/or BioXpress - This file contains human [taxid:9606] genes with associated cancer data in BioMuta and/or BioXpress cancer mutation and expression databases. Genes are mapped to canonical UniProtKB/Swiss-Prot accessions and RefSeq accession. " - ], - "score": { - "usability_domain_length": 306 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000132/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Function (UniProtKB) dataset contains biological function annotation for mouse [taxid:10090] protein accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 306 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000294/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to BCSDB ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://csdb.glycoscience.ru/bacterial/" - ], - "score": { - "usability_domain_length": 305 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000131/v-2.0.2", - "usability_domain": [ - "The Human Protein Function (UniProtKB) dataset contains biological function annotation for human [taxid:9606] protein accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 305 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000039/1.0.36", - "usability_domain": [ - "List of human [taxid:9606] genes with cancer data in BioMuta and/or BioXpress - This file contains human [taxid:9606] genes with associated cancer data in BioMuta and/or BioXpress cancer mutation and expression databases. Genes are mapped to canonical UniProtKB/Swiss-Prot accessions and RefSeq accession." - ], - "score": { - "usability_domain_length": 305 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000131/v-2.1.1", - "usability_domain": [ - "The Human Protein Function (UniProtKB) dataset contains biological function annotation for human [taxid:9606] protein accessions from the UniProtKB database. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 305 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000294/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to BCSDB ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://csdb.glycoscience.ru/bacterial/" - ], - "score": { - "usability_domain_length": 305 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000247/v-2.0.2", - "usability_domain": [ - "The Rat Protein Function (UniProtKB) dataset contains biological functional annotations for rat [taxid:10116] protein accessions from the UniProtKB database. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 303 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000247/v-2.1.1", - "usability_domain": [ - "The Rat Protein Function (UniProtKB) dataset contains biological functional annotations for rat [taxid:10116] protein accessions from the UniProtKB database. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 303 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000478/v-2.0.2", - "usability_domain": [ - "The dataset provides relationship between the glycans (identified by the GlyTouCan accession). The relation is categorized as either Ancestor, Descendant, SubsumedBy or Subsumes. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)." - ], - "score": { - "usability_domain_length": 302 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000478/v-2.1.1", - "usability_domain": [ - "The dataset provides relationship between the glycans (identified by the GlyTouCan accession). The relation is categorized as either Ancestor, Descendant, SubsumedBy or Subsumes. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)." - ], - "score": { - "usability_domain_length": 302 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000033/1.0.25", - "usability_domain": [ - "The glycotyper dataset displays normalized intensity values detected and quantified by Matrix Assisted Laser Desorption Ionization Mass Spectrometry Imaging (MALDI-MSI) for N-glycans of specific serum proteins across Hepatocellular carcinoma and other patient cohorts such as transplant and cirrhosis." - ], - "score": { - "usability_domain_length": 301 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000046/1.0.36", - "usability_domain": [ - "The glycotyper dataset displays normalized intensity values detected and quantified by Matrix Assisted Laser Desorption Ionization Mass Spectrometry Imaging (MALDI-MSI) for N-glycans of specific serum proteins across Hepatocellular carcinoma and other patient cohorts such as transplant and cirrhosis." - ], - "score": { - "usability_domain_length": 301 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000438/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for sarscov1 [taxid:694009] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 300 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000438/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for sarscov1 [taxid:694009] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 300 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000413/v-2.0.2", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for sarscov1 [taxid:694009] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 299 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000298/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to CFG ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://www.functionalglycomics.org/" - ], - "score": { - "usability_domain_length": 299 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000598/v-2.0.2", - "usability_domain": [ - "The Mouse UniProtKB Xref Rhea contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Rhea" - ], - "score": { - "usability_domain_length": 299 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000097/1.0.36", - "usability_domain": [ - "A0401 is a panel for PCA3 score of measured protein mRNA level ratio as predictive biomarker for prostate cancer patients. A PCA3 Score <25 is associated with a decreased likelihood of a positive biopsy. Prostatic biopsy is required for diagnosis of cancer. The proteins are PCA3, PSA (UPKB:P07288)." - ], - "score": { - "usability_domain_length": 299 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000298/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to CFG ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; http://www.functionalglycomics.org/" - ], - "score": { - "usability_domain_length": 299 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000413/v-2.1.1", - "usability_domain": [ - "The SARS coronavirus (SARS-CoV-1) Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for sarscov1 [taxid:694009] proteins. The dataset is derived from 2020-01 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 299 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000598/v-2.1.1", - "usability_domain": [ - "The Mouse UniProtKB Xref Rhea contains mouse [taxid:10090] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Rhea" - ], - "score": { - "usability_domain_length": 299 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000597/v-2.0.2", - "usability_domain": [ - "The Human UniProtKB Xref Rhea contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Rhea" - ], - "score": { - "usability_domain_length": 298 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000597/v-2.1.1", - "usability_domain": [ - "The Human UniProtKB Xref Rhea contains human [taxid:9606] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Rhea" - ], - "score": { - "usability_domain_length": 298 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000337/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to GlycoEpitope for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.glycoepitope.jp/" - ], - "score": { - "usability_domain_length": 296 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000009/1.0.25", - "usability_domain": [ - "Human Cancer Disease Ontology Slim IDs (V2.1) directly mapped to UBERON Anatomical Entity IDs - The csv file human_doid_uberon_mapping.csv contains slim-level Disease Ontology IDs (DOIDs) from the Cancer DO Slim project mapped to the corresponding tissue in the UBERON Anatomical Entity Ontology." - ], - "score": { - "usability_domain_length": 296 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000337/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to GlycoEpitope for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/; https://www.glycoepitope.jp/" - ], - "score": { - "usability_domain_length": 296 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000525/v-2.0.2", - "usability_domain": [ - "The Human Disease Ontology Mapping dataset contains human [taxid:9606] UniProtKB canonical accession cross-reference mappings to the Disease Ontology Ids (DOID)", - "If you use this dataset please provide proper attribution to GlyGen and Disease ontology, Monarch Disease Ontology and EMBL-EBI-UniProt" - ], - "score": { - "usability_domain_length": 295 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000086/1.0.36", - "usability_domain": [ - "A0390 is a biomarker panel of identified gene predictive biomarkers (mutations) in ovarian cancer (DOID:2394)patients with deleterious or suspected deleterious germline BRCA variants. The genes are BRCA1 (UPKB:P38398), BRCA2 (UPKB:P51587). This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 295 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000525/v-2.1.1", - "usability_domain": [ - "The Human Disease Ontology Mapping dataset contains human [taxid:9606] UniProtKB canonical accession cross-reference mappings to the Disease Ontology Ids (DOID)", - "If you use this dataset please provide proper attribution to GlyGen and Disease ontology, Monarch Disease Ontology and EMBL-EBI-UniProt" - ], - "score": { - "usability_domain_length": 295 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000470/1.0", - "usability_domain": [ - "This BCO draft was generated by the Nextflow engine, and revised at the BCO Portal. Additional documentation about the Nextflow-BCO connection is here: https://github.com/nextflow-io/nf-prov\n\nAdditional documentation about the RNAseq-NF pipeline is here:\nhttps://github.com/nextflow-io/rnaseq-nf" - ], - "score": { - "usability_domain_length": 295 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000002/v-2.0.2", - "usability_domain": [ - "The Human Protein Canonical (UniProtKB) Sequences dataset contains human [taxid:9606] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 294 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000002/v-2.1.1", - "usability_domain": [ - "The Human Protein Canonical (UniProtKB) Sequences dataset contains human [taxid:9606] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 294 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000684/v-2.0.2", - "usability_domain": [ - "The Fruitfly Glycosyltransferases dataset contains a list of fruitfly [taxid:7227] Glycosyltransferases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam.If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 293 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000599/v-2.0.2", - "usability_domain": [ - "The Rat UniProtKB Xref Rhea contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Rhea" - ], - "score": { - "usability_domain_length": 293 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000338/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to MatrixDB for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://matrixdb.univ-lyon1.fr/" - ], - "score": { - "usability_domain_length": 293 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000338/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to MatrixDB for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/;http://matrixdb.univ-lyon1.fr/" - ], - "score": { - "usability_domain_length": 293 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000599/v-2.1.1", - "usability_domain": [ - "The Rat UniProtKB Xref Rhea contains rat [taxid:10116] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen", - "Rhea" - ], - "score": { - "usability_domain_length": 293 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000684/v-2.1.1", - "usability_domain": [ - "The Fruitfly Glycosyltransferases dataset contains a list of fruitfly [taxid:7227] Glycosyltransferases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam.If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 293 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000333/v-2.0.2", - "usability_domain": [ - "The Mouse Interaction data (MatrixDB) dataset contains interaction data between Mouse [taxid:10090] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." - ], - "score": { - "usability_domain_length": 292 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000333/v-2.1.1", - "usability_domain": [ - "The Mouse Interaction data (MatrixDB) dataset contains interaction data between Mouse [taxid:10090] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." - ], - "score": { - "usability_domain_length": 292 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000240/v-2.0.2", - "usability_domain": [ - "The Rat Protein Canonical Sequences (UniProtKB) dataset contains rat [taxid:10116] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 291 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000332/v-2.0.2", - "usability_domain": [ - "The Human Interaction data (MatrixDB) dataset contains interaction data between Human [taxid:9606] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." - ], - "score": { - "usability_domain_length": 291 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000681/v-2.0.2", - "usability_domain": [ - "The Fruitfly Glycosylation Sites (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 291 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000095/1.0.36", - "usability_domain": [ - "A0399 Panel of identified chromosome somatic mutants (aneuploidy) and locus mutant (loss) monitoring biomarkers in urinary bladder (DOID:11054) cancer. The chromosomes are chromosme 3, chromosme 7, chromosme 17, and the locus is 9p21. This panel is curated from the FDA Approved biomarkers" - ], - "score": { - "usability_domain_length": 291 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000240/v-2.1.1", - "usability_domain": [ - "The Rat Protein Canonical Sequences (UniProtKB) dataset contains rat [taxid:10116] canonical protein fasta sequences from the UniProtKB database. The dataset is derived from 2019_09 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 291 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000332/v-2.1.1", - "usability_domain": [ - "The Human Interaction data (MatrixDB) dataset contains interaction data between Human [taxid:9606] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." - ], - "score": { - "usability_domain_length": 291 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000681/v-2.1.1", - "usability_domain": [ - "The Fruitfly Glycosylation Sites (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB canonical accessions with information on reported and predicted glycosylation sites in UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 291 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000030/v-2.0.2", - "usability_domain": [ - "The Mouse Glycosyltransferases dataset contains a list of mouse [taxid:10090] Glycosyltransferases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 289 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000030/v-2.1.1", - "usability_domain": [ - "The Mouse Glycosyltransferases dataset contains a list of mouse [taxid:10090] Glycosyltransferases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 289 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000334/v-2.0.2", - "usability_domain": [ - "The Rat Interaction data (MatrixDB) dataset contains interaction data between Rat [taxid:10090] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." - ], - "score": { - "usability_domain_length": 288 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000477/v-2.0.2", - "usability_domain": [ - "The dataset provides details (such as image size, image notation, image style, image format)for the glycan (identified by GlyTouCan accession) images used in GlyGen. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)" - ], - "score": { - "usability_domain_length": 288 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000334/v-2.1.1", - "usability_domain": [ - "The Rat Interaction data (MatrixDB) dataset contains interaction data between Rat [taxid:10090] proteins and Glycosaminoglycans (GAGs) from MatrixDB database (http://matrixdb.univ-lyon1.fr/ ;PMID:19147664). If you use this dataset please provide proper attribution to MatrixDB and GlyGen." - ], - "score": { - "usability_domain_length": 288 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000477/v-2.1.1", - "usability_domain": [ - "The dataset provides details (such as image size, image notation, image style, image format)for the glycan (identified by GlyTouCan accession) images used in GlyGen. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)" - ], - "score": { - "usability_domain_length": 288 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000280/v-2.0.2", - "usability_domain": [ - "The Rat Glycohydrolases dataset contains a list of rat [taxid:9606] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. . If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 285 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000280/v-2.1.1", - "usability_domain": [ - "The Rat Glycohydrolases dataset contains a list of rat [taxid:9606] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. . If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 285 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000685/v-2.0.2", - "usability_domain": [ - "The Fruitfly Glycohydrolases dataset contains a list of fruitfly [taxid:7227] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam.If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 283 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000685/v-2.1.1", - "usability_domain": [ - "The Fruitfly Glycohydrolases dataset contains a list of fruitfly [taxid:7227] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam.If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 283 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000711/v-2.1.1", - "usability_domain": [ - "The Fruitfly O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) contains fruitfly (taxid:7227) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO." - ], - "score": { - "usability_domain_length": 282 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000025/v-2.0.2", - "usability_domain": [ - "The Human Glycohydrolases dataset contains a list of human [taxid:9606] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. . If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 280 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000292/v-2.0.2", - "usability_domain": [ - "The dataset provides the glycan sequences in WURCS extended format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 280 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000290/v-2.0.2", - "usability_domain": [ - "The dataset provides the glycan sequences in IUPAC extended format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 280 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000025/v-2.1.1", - "usability_domain": [ - "The Human Glycohydrolases dataset contains a list of human [taxid:9606] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. . If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 280 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000283/v-2.1.1", - "usability_domain": [ - "The dataset provides information on the glycan motifs for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281) and from GlycoMotif (https://glycomotif.glyomics.org/)." - ], - "score": { - "usability_domain_length": 280 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000026/v-2.0.2", - "usability_domain": [ - "The Mouse Glycohydrolases dataset contains a list of mouse [taxid:10090] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 279 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000287/v-2.0.2", - "usability_domain": [ - "The dataset provides the glycan sequences in Glycam IUPAC format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://dev.glycam.org/" - ], - "score": { - "usability_domain_length": 279 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000026/v-2.1.1", - "usability_domain": [ - "The Mouse Glycohydrolases dataset contains a list of mouse [taxid:10090] glycohydrolases derived from UniProtKB database using advanced search and other databases including CAZy, InterPro and Pfam. If you use this dataset please provide proper attribution to UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 279 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000290/v-2.1.1", - "usability_domain": [ - "The dataset provides the glycan sequences in IUPAC extended format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 279 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000292/v-2.1.1", - "usability_domain": [ - "The dataset provides the glycan sequences in WURCS extended format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 279 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000046/1.0", - "usability_domain": [ - "A SARS-CoV-2 complete genome fasta file was derived from UniProt (UniProt ID: UP000464024), processed on the Argos Project server, and will be available on the front end data website for the Argos Project, data.argosdb.org. The complete genome contains information for 17 genes." - ], - "score": { - "usability_domain_length": 278 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000287/v-2.1.1", - "usability_domain": [ - "The dataset provides the glycan sequences in Glycam IUPAC format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). Source database: https://dev.glycam.org/" - ], - "score": { - "usability_domain_length": 278 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000709/v-2.1.1", - "usability_domain": [ - "The Mouse O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) contains mouse (taxid:10090) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO." - ], - "score": { - "usability_domain_length": 277 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000708/v-2.1.1", - "usability_domain": [ - "The Human O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) contains human (taxid:9606) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO." - ], - "score": { - "usability_domain_length": 276 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000559/v-2.0.2", - "usability_domain": [ - "The dataset provides the glycan sequences in Byonic format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://dev.glycam.org/" - ], - "score": { - "usability_domain_length": 273 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000578/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to GPTWiki ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database:https://gptwiki.glyomics.org/" - ], - "score": { - "usability_domain_length": 273 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000288/v-2.0.2", - "usability_domain": [ - "The dataset provides the glycan sequences in GlycoCT format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 273 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000559/v-2.1.1", - "usability_domain": [ - "The dataset provides the glycan sequences in Byonic format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://dev.glycam.org/" - ], - "score": { - "usability_domain_length": 273 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000578/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to GPTWiki ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database:https://gptwiki.glyomics.org/" - ], - "score": { - "usability_domain_length": 273 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000511/v-2.0.2", - "usability_domain": [ - "The Human Proteoform Citations (Literature) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000143. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 272 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000088/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for mouse [taxid:10090] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 272 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000703/v-2.0.2", - "usability_domain": [ - "The dataset contains pathway data for all N-glycans which are fully mapped to the GlycoTree framework. Each pathway specifies details for a particular reaction catalyzed by enzymes (UniProtKB Accession) which effect the associated glycan structures (GlyTouCan Accessions)." - ], - "score": { - "usability_domain_length": 272 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000088/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for mouse [taxid:10090] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 272 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000511/v-2.1.1", - "usability_domain": [ - "The Human Proteoform Citations (Literature) dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000143. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 272 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000703/v-2.1.1", - "usability_domain": [ - "The dataset contains pathway data for all N-glycans which are fully mapped to the GlycoTree framework. Each pathway specifies details for a particular reaction catalyzed by enzymes (UniProtKB Accession) which effect the associated glycan structures (GlyTouCan Accessions)." - ], - "score": { - "usability_domain_length": 272 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000710/v-2.1.1", - "usability_domain": [ - "The Rat O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) contains rat (taxid:10116) O-GlcNAc modifications on Serine and Threonine extracted from the publication and mapped to the UniProtKB canonical accession and fasta sequence. The GlyTouCan ID for the O-GlcNAc is G49108TO" - ], - "score": { - "usability_domain_length": 272 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000288/v-2.1.1", - "usability_domain": [ - "The dataset provides the glycan sequences in GlycoCT format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 272 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000648/v-2.0.2", - "usability_domain": [ - "The Fruitfly EBI-UniProtKB NT file contains proteome data for fruitfly [taxid:7227] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. T If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 271 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000032/v-2.0.2", - "usability_domain": [ - "The Mouse Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for mouse [taxid:10090] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 271 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000087/v-2.0.2", - "usability_domain": [ - "The Human Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for human [taxid:9606] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 271 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000032/v-2.1.1", - "usability_domain": [ - "The Mouse Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for mouse [taxid:10090] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 271 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000087/v-2.1.1", - "usability_domain": [ - "The Human Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for human [taxid:9606] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 271 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000648/v-2.1.1", - "usability_domain": [ - "The Fruitfly EBI-UniProtKB NT file contains proteome data for fruitfly [taxid:7227] from UniProtKB and other EMBL-EBI resources as per the GlyGen specifications in the NT format. T If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 271 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000222/v-2.0.2", - "usability_domain": [ - "The Rat Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for rat [taxid:10116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 270 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000652/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Sequence Info (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB protein fasta sequence information that includes sequence version and fasta header. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 270 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000031/v-2.0.2", - "usability_domain": [ - "The Human Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for human [taxid:9606] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 270 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000110/1.0.36", - "usability_domain": [ - "A0435 is a Fibrosis vs Cirrhosis Biomarker Panel provided by Aswini Panigrahi from Georgetown University. This is a ratio of relative abundance of the di- and mono-sialylated O-glycoforms of HPX which shows a significant increase with the progression of liver disease. " - ], - "score": { - "usability_domain_length": 270 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000031/v-2.1.1", - "usability_domain": [ - "The Human Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for human [taxid:9606] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 270 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000222/v-2.1.1", - "usability_domain": [ - "The Rat Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for rat [taxid:10116] proteins. The dataset is derived from 2019-09 UniProtKB release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 270 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000652/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Sequence Info (UniProtKB) dataset contains fruitfly [taxid:7227] UniProtKB protein fasta sequence information that includes sequence version and fasta header. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 270 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000634/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Isoform sequences (UniProtKB) dataset contains fruitfly [taxid:7227] protein fasta sequences for the fruitfly isoform accessions from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 269 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000579/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to Glycosmos ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glycosmos.org/" - ], - "score": { - "usability_domain_length": 269 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000579/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to Glycosmos ID's for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glycosmos.org/" - ], - "score": { - "usability_domain_length": 269 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000634/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Isoform sequences (UniProtKB) dataset contains fruitfly [taxid:7227] protein fasta sequences for the fruitfly isoform accessions from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 269 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000283/v-2.0.2", - "usability_domain": [ - "The dataset provides information on the glycan motifs for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 266 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000265/v-2.0.2", - "usability_domain": [ - "The Rat Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for rat [taxid:10116] proteins. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 265 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000265/v-2.1.1", - "usability_domain": [ - "The Rat Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for rat [taxid:10116] proteins. The dataset is derived from 2019-05 UniProt release. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 265 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000001/1.5", - "usability_domain": [ - "Identify baseline single nucleotide polymorphisms (SNPs)[SO:0000694], (insertions)[SO:0000667], and (deletions)[SO:0000045] that correlate with reduced (ledipasvir)[pubchem.compound:67505836] antiviral drug efficacy in (Hepatitis C virus subtype 1)[taxonomy:31646]" - ], - "score": { - "usability_domain_length": 264 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000013/1.0.25", - "usability_domain": [ - " List of mouse [taxid:10090] genes with normal RNA-Seq and Affymetrix expression data in Bgee; additional documentation available at https://github.com/BgeeDB/bgee_pipeline/tree/develop/pipeline/collaboration/oncoMX#information-about-the-files-generated-for-oncomx" - ], - "score": { - "usability_domain_length": 264 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000644/v-2.0.2", - "usability_domain": [ - "The Fruitfly Gene Ontology (GO) dataset contains fruitfly [taxid:7227] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 262 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000012/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] genes with normal RNA-Seq and Affymetrix expression data in Bgee; additional documentation available at https://github.com/BgeeDB/bgee_pipeline/tree/develop/pipeline/collaboration/oncoMX#information-about-the-files-generated-for-oncomx" - ], - "score": { - "usability_domain_length": 262 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000644/v-2.1.1", - "usability_domain": [ - "The Fruitfly Gene Ontology (GO) dataset contains fruitfly [taxid:7227] Gene Ontology Id and name, categories and evidence information for UniProtKB canonical accessions. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 262 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000497/v-2.0.2", - "usability_domain": [ - "The dataset provides the byonic format of n-linked human glycans (identified by GlyTouCan Ac.). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/. " - ], - "score": { - "usability_domain_length": 261 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000497/v-2.1.1", - "usability_domain": [ - "The dataset provides the byonic format of n-linked human glycans (identified by GlyTouCan Ac.). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/. " - ], - "score": { - "usability_domain_length": 261 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000340/v-2.0.2", - "usability_domain": [ - "The dataset provides names for the associated glycan (GlyTouCan Accession) in different formats. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 260 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000340/v-2.1.1", - "usability_domain": [ - "The dataset provides names for the associated glycan (GlyTouCan Accession) in different formats. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 260 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000649/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Names (NCBI RefSeq) dataset contains fruitfly [taxid:7227] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 258 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000649/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Names (NCBI RefSeq) dataset contains fruitfly [taxid:7227] protein names (primary and other alternative/synonym protein names) from NCBI RefSeq database. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 258 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000715/v-2.1.1", - "usability_domain": [ - "The Fruitfly O-GlcNAc Glycosylation Citations (O-GlcNAc Atlas) dataset contains details of the publication present in the dataset Fruitfly O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) https://data.glygen.org/GLY_000711, contributed by Jungfeng Ma's group." - ], - "score": { - "usability_domain_length": 257 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000639/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Function (UniProtKB) dataset contains biological function annotation for fruitfly [taxid:7227] protein accessions from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 256 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000035/1.0.25", - "usability_domain": [ - "Table of gene preferential expression profiles in cancer cells. The csv file human_cancer_scRNA_preferential_expression contains expression specificity scores, expression specificity annotations, and associated biological information for cancer cell types." - ], - "score": { - "usability_domain_length": 256 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000639/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Function (UniProtKB) dataset contains biological function annotation for fruitfly [taxid:7227] protein accessions from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. " - ], - "score": { - "usability_domain_length": 256 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000702/v-2.0.2", - "usability_domain": [ - "The Fruitfly NCBI Protein Linkouts contains fruitfly [taxid:7227] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID." - ], - "score": { - "usability_domain_length": 253 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000074/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for head and Neck squamous cell carcinoma patients in the TCGA-HNSC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 253 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000702/v-2.1.1", - "usability_domain": [ - "The Fruitfly NCBI Protein Linkouts contains fruitfly [taxid:7227] NCBI RefSeq accessions with their corresponding GlyGen protein detail page URLs. The dataset is formatted as per NCBI non-bib. linkouts guidelines where 10277 is the GlyGen's provider ID." - ], - "score": { - "usability_domain_length": 253 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000683/v-2.0.2", - "usability_domain": [ - "This dataset contains fruitfly [taxid:7227] phosphorylation sites from iPTMNet database. The protein and sites are mapped to UniProtKB canonical accessions and fasta sequence. If you use this dataset please provide proper attribution to iPTMnet-GlyGen." - ], - "score": { - "usability_domain_length": 252 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000068/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for uterine corpus endometrial carcinoma patients in the TCGA-UCEC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 252 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000683/v-2.1.1", - "usability_domain": [ - "This dataset contains fruitfly [taxid:7227] phosphorylation sites from iPTMNet database. The protein and sites are mapped to UniProtKB canonical accessions and fasta sequence. If you use this dataset please provide proper attribution to iPTMnet-GlyGen." - ], - "score": { - "usability_domain_length": 252 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000515/v-2.0.2", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file sarscov1_proteoform_glycosylation_sites_literature.csv. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 251 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000515/v-2.1.1", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file sarscov1_proteoform_glycosylation_sites_literature.csv. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 251 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000717/v-2.1.1", - "usability_domain": [ - "The O-Glucosylation Glycosylation Citations dataset contains details of the publication present in the dataset Human O-Glucosylation Glycosylation Sites https://data.glygen.org/GLY_000716, contributed by Daniel Williamson from Bob Haltiwanger's group." - ], - "score": { - "usability_domain_length": 251 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000712/v-2.1.1", - "usability_domain": [ - "The Human O-GlcNAc Glycosylation Citations (O-GlcNAc Atlas) dataset contains details of the publication present in the dataset Human O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) https://data.glygen.org/GLY_000708, contributed by Jungfeng Ma's group." - ], - "score": { - "usability_domain_length": 251 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000713/v-2.1.1", - "usability_domain": [ - "The Mouse O-GlcNAc Glycosylation Citations (O-GlcNAc Atlas) dataset contains details of the publication present in the dataset Mouse O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) https://data.glygen.org/GLY_000709, contributed by Jungfeng Ma's group." - ], - "score": { - "usability_domain_length": 251 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000514/v-2.0.2", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file human_proteoform_glycosylation_sites_tyr_o_linked.csv. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 250 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000514/v-2.1.1", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file human_proteoform_glycosylation_sites_tyr_o_linked.csv. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 250 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000698/v-2.0.2", - "usability_domain": [ - "The Fruitfly UniProtKB Xref Rhea contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Rhea" - ], - "score": { - "usability_domain_length": 249 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000073/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for kidney renal clear cell carcinoma patients in the TCGA-KIRC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 249 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000698/v-2.1.1", - "usability_domain": [ - "The Fruitfly UniProtKB Xref Rhea contains fruitfly [taxid:7227] UniProtKB canonical accessions cross-referenced to Rhea database accessions/identifiers. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen. ", - "Rhea" - ], - "score": { - "usability_domain_length": 249 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000714/v-2.1.1", - "usability_domain": [ - "The Rat O-GlcNAc Glycosylation Citations (O-GlcNAc Atlas) dataset contains details of the publication present in the dataset Rat O-GlcNAc Glycosylation Sites (O-GlcNAc Atlas) https://data.glygen.org/GLY_000710, contributed by Jungfeng Ma's group." - ], - "score": { - "usability_domain_length": 247 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000015/1.0.25", - "usability_domain": [ - "Master results table of cancer cell-type expression specificity data. The csv file human_cancer_scRNA_expression contains expression specificity scores, expression specificity annotations, and associated disease information for cancer cell types." - ], - "score": { - "usability_domain_length": 246 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000072/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for liver hepatocellular carcinoma patients in the TCGA-LIHC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 246 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000491/v-2.0.2", - "usability_domain": [ - "The dataset provides SNFG images for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 245 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000636/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Canonical (UniProtKB) Sequences dataset contains fruitfly [taxid:7227] canonical protein fasta sequences from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 245 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000339/v-2.0.2", - "usability_domain": [ - "The dataset provides SNFG images for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 245 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000339/v-2.1.1", - "usability_domain": [ - "The dataset provides SNFG images for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 245 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000491/v-2.1.1", - "usability_domain": [ - "The dataset provides SNFG images for the associated glycan (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). Source database: https://glytoucan.org/" - ], - "score": { - "usability_domain_length": 245 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000636/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Canonical (UniProtKB) Sequences dataset contains fruitfly [taxid:7227] canonical protein fasta sequences from the UniProtKB database. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 245 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000600/v-2.1.1", - "usability_domain": [ - "-", - "The dataset provides evidence for a library of complex multiantennary Asn-linked N-glycans generated by chemo-enzymatic synthesis (PMID:30745240). The data was contributed by the Richard Cummings Laboratory (NCFG BIDMC Harvard Medical School)." - ], - "score": { - "usability_domain_length": 245 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000642/v-2.0.2", - "usability_domain": [ - "The Fruitfly Gene Names (NCBI RefSeq) dataset contains fruitfly [taxid:7227] gene Names/names (primary gene name and gene synonyms) from NCBI RefSeq database. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 244 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000076/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for bladder urothelial carcinoma patients in the TCGA-BLCA study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 244 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000069/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for lung squamous cell carcinoma patients in the TCGA-LUSC study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 244 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000075/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for kidney chromophobe carcinoma patients in the TCGA-KICH study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 244 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000642/v-2.1.1", - "usability_domain": [ - "The Fruitfly Gene Names (NCBI RefSeq) dataset contains fruitfly [taxid:7227] gene Names/names (primary gene name and gene synonyms) from NCBI RefSeq database. If you use this dataset please provide proper attribution to NCBI RefSeq and GlyGen" - ], - "score": { - "usability_domain_length": 244 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000326/v-2.0.2", - "usability_domain": [ - "The dataset provides a set of glycans which are fully determined. Each glycan is associated with a GlyTouCan Accession. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)." - ], - "score": { - "usability_domain_length": 243 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000326/v-2.1.1", - "usability_domain": [ - "The dataset provides a set of glycans which are fully determined. Each glycan is associated with a GlyTouCan Accession. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)." - ], - "score": { - "usability_domain_length": 243 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000676/v-2.0.2", - "usability_domain": [ - "The Fruitfly O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Fruitfly O-GlcNAc Glycosylation Sites (MCW) httpss://data.glygen.org/GLY_000517, contributed by Stephanie Olivier's group." - ], - "score": { - "usability_domain_length": 242 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000676/v-2.1.1", - "usability_domain": [ - "The Fruitfly O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Fruitfly O-GlcNAc Glycosylation Sites (MCW) httpss://data.glygen.org/GLY_000517, contributed by Stephanie Olivier's group." - ], - "score": { - "usability_domain_length": 242 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000580/v-2.0.2", - "usability_domain": [ - "The dataset provides the glycan sequences in GlycoWorkBench format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281). " - ], - "score": { - "usability_domain_length": 241 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000580/v-2.1.1", - "usability_domain": [ - "The dataset provides the glycan sequences in GlycoWorkBench format for the associated glycans (GlyTouCan Accession). The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLY_000281)." - ], - "score": { - "usability_domain_length": 239 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000066/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for esophageal carcinoma patients in the TCGA-ESCA study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 236 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000518/v-2.0.2", - "usability_domain": [ - "The Human O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Human O-GlcNAc Glycosylation Sites (MCW) https://data.glygen.org/GLY_000517, contributed by Stephanie Olivier's group." - ], - "score": { - "usability_domain_length": 235 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000071/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for lung adenocarcinoma patients in the TCGA-LUAD study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 235 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000518/v-2.1.1", - "usability_domain": [ - "The Human O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Human O-GlcNAc Glycosylation Sites (MCW) https://data.glygen.org/GLY_000517, contributed by Stephanie Olivier's group." - ], - "score": { - "usability_domain_length": 235 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000070/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for thyroid carcinoma patients in the TCGA-THCA study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 233 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000573/v-2.0.2", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000574/v-2.0.2", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000516/v-2.0.2", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000513/v-2.0.2", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000142. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000512/v-2.0.2", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000335. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000512/v-2.1.1", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000335. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000513/v-2.1.1", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000142. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000516/v-2.1.1", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000573/v-2.1.1", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000574/v-2.1.1", - "usability_domain": [ - "The dataset provides citation details corresponding to the PubMed ID (PMID) from file https://data.glygen.org/GLYDS000481. The script accesses the NCBI ENTREZ infrastructure to retrieve the publication information of the given PMID." - ], - "score": { - "usability_domain_length": 232 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000067/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for prostate cancer patients in the TCGA-PRAD study. The dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 231 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000044/1.0.36", - "usability_domain": [ - "List of human [taxid:9606] mRNAs and miRNAs with literature evidence of expression in cancer - This file contains human [taxid:9606] mRNAs and miRNAs with reported expression in cancer mined from abstracts in PubMed using DEXTER." - ], - "score": { - "usability_domain_length": 229 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000065/1.0.36", - "usability_domain": [ - "A list of normalized TPM gene expression from RNA-seq and corresponding survival data for breast cancer patients in the TCGA-BRCA study. Te dataset can be used for performing survival analysis and generating Kaplan-Meier graphs." - ], - "score": { - "usability_domain_length": 228 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000089/1.0.36", - "usability_domain": [ - "A0393 is a panel of identified gene predictive biomarkers (mutations) in colorectal cancer (DOID:9256). The genes are KRAS (UPKB:P01116), NRAS (GTPase NRas gene (NRAS). This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 224 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000014/1.2", - "usability_domain": [ - "Annotation file with lineage specific mutations mapped to SARS-CoV-2 (Wuhan-Hu-1) reference proteins.", - "The primary use case for this data set is to see how the mutations might affect protein function or vaccine/drug targets." - ], - "score": { - "usability_domain_length": 222 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000014/1.3", - "usability_domain": [ - "Annotation file with lineage specific mutations mapped to SARS-CoV-2 (Wuhan-Hu-1) reference proteins.", - "The primary use case for this data set is to see how the mutations might affect protein function or vaccine/drug targets." - ], - "score": { - "usability_domain_length": 222 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000014/1.4", - "usability_domain": [ - "Annotation file with lineage specific mutations mapped to SARS-CoV-2 (Wuhan-Hu-1) reference proteins.", - "The primary use case for this data set is to see how the mutations might affect protein function or vaccine/drug targets." - ], - "score": { - "usability_domain_length": 222 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000014/1.5", - "usability_domain": [ - "Annotation file with lineage specific mutations mapped to SARS-CoV-2 (Wuhan-Hu-1) reference proteins.", - "The primary use case for this data set is to see how the mutations might affect protein function or vaccine/drug targets." - ], - "score": { - "usability_domain_length": 222 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000651/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for fruitfly [taxid:7227] proteins. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 222 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000651/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Recommended Names (UniProtKB) dataset contains UniProtKB recommended names for fruitfly [taxid:7227] proteins. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 222 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000635/v-2.0.2", - "usability_domain": [ - "The Fruitfly Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for fruitfly [taxid:7227] proteins. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 221 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000635/v-2.1.1", - "usability_domain": [ - "The Fruitfly Protein Alternative Name (UniProtKB) dataset contains UniProtKB alternative names for fruitfly [taxid:7227] proteins. If you use this dataset please provide proper attribution to EMBL-EBI-UniProtKB and GlyGen" - ], - "score": { - "usability_domain_length": 221 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000336/v-2.0.2", - "usability_domain": [ - "The Mouse (TaxID:10090) Xref GlycoProtDB contains x-refs to IDs present in the GlycoProtDB database (https://acgg.asia/gpdb2/index). If you use this dataset please provide proper attribution to GlycoProtDB and GlyGen. " - ], - "score": { - "usability_domain_length": 219 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000336/v-2.1.1", - "usability_domain": [ - "The Mouse (TaxID:10090) Xref GlycoProtDB contains x-refs to IDs present in the GlycoProtDB database (https://acgg.asia/gpdb2/index). If you use this dataset please provide proper attribution to GlycoProtDB and GlyGen. " - ], - "score": { - "usability_domain_length": 219 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000055/1.0.36", - "usability_domain": [ - "This file contains a list of human [taxid:9606] genes with cancer mutation data from dbSNP and EBI. This file contains custom human genes with associated cancer mutation data derived from germline and somatic mutation." - ], - "score": { - "usability_domain_length": 218 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000017/1.0.25", - "usability_domain": [ - "This file contains a list of human [taxid:9606] genes with cancer mutation data from dbSNP and EBI. This file contains custom human genes with associated cancer mutation data derived from germline and somatic mutation." - ], - "score": { - "usability_domain_length": 218 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000047/1.1", - "usability_domain": [ - "The Argos Biosample Metadata master data file sheet is a collection of metrics for NCBI Biosamples associated with the database for Reference Grade microbial Sequences (FDA ARGOS). This sheet was manually populated." - ], - "score": { - "usability_domain_length": 215 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000036/1.0.25", - "usability_domain": [ - "This table may be used as a list of candidate marker genes in cancer cells. This table features the top ten differentially expressed genes in the cells of a cancer type, determined with a Wilcoxon rank-sum test." - ], - "score": { - "usability_domain_length": 211 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000552/v-2.0.2", - "usability_domain": [ - "The dataset provides cross-references to GlyTouCan for the for GlyGen Mapper tool. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)" - ], - "score": { - "usability_domain_length": 205 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000552/v-2.1.1", - "usability_domain": [ - "The dataset provides cross-references to GlyTouCan for the for GlyGen Mapper tool. The listed GlyTouCan Accessions are derived from the GlyTouCan accession master list (https://data.glygen.org/GLYDS000281)" - ], - "score": { - "usability_domain_length": 205 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000718/v-2.1.1", - "usability_domain": [ - "The Human Glycan Interaction data Citations (MatrixDB)] contains details of the publication present in the dataset The Human Glycan Interaction data Citations (MatrixDB) https://data.glygen.org/GLY_000332." - ], - "score": { - "usability_domain_length": 205 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000719/v-2.1.1", - "usability_domain": [ - "The Mouse Glycan Interaction data Citations (MatrixDB)] contains details of the publication present in the dataset The Mouse Glycan Interaction data Citations (MatrixDB) https://data.glygen.org/GLY_000333." - ], - "score": { - "usability_domain_length": 205 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000720/v-2.1.1", - "usability_domain": [ - "The Rat Glycan Interaction data Citations (MatrixDB)] contains details of the publication present in the dataset The Rat Glycan Interaction data Citations (MatrixDB) https://data.glygen.org/GLY_000334." - ], - "score": { - "usability_domain_length": 201 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000696/v-2.0.2", - "usability_domain": [ - "The Mouse O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Mouse O-GlcNAc Glycosylation Sites (MCW) contributed by Stephanie Olivier's group." - ], - "score": { - "usability_domain_length": 198 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000696/v-2.1.1", - "usability_domain": [ - "The Mouse O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Mouse O-GlcNAc Glycosylation Sites (MCW) contributed by Stephanie Olivier's group." - ], - "score": { - "usability_domain_length": 198 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000040/1.0.36", - "usability_domain": [ - "The neoepitope dataset displays data on neoepitope peptides that arise from tumor-specific mutations. It catalogs experimental data on epitopes studied in humans in the context of various cancers." - ], - "score": { - "usability_domain_length": 196 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000034/1.0.25", - "usability_domain": [ - "The neoepitope dataset displays data on neoepitope peptides that arise from tumor-specific mutations. It catalogs experimental data on epitopes studied in humans in the context of various cancers." - ], - "score": { - "usability_domain_length": 196 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000700/v-2.0.2", - "usability_domain": [ - "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information for fruitfly [taxid:7227] proteins. " - ], - "score": { - "usability_domain_length": 195 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000700/v-2.1.1", - "usability_domain": [ - "This dataset provides the Protein Resource Ontology (PRO) identifiers associated with the proteoforms generated based on the protein, site, glycan information for fruitfly [taxid:7227] proteins. " - ], - "score": { - "usability_domain_length": 195 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000697/v-2.0.2", - "usability_domain": [ - "The Rat O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Rat O-GlcNAc Glycosylation Sites (MCW) contributed by Stephanie Olivier's group." - ], - "score": { - "usability_domain_length": 194 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000090/1.0.36", - "usability_domain": [ - "A0390 is a panel of identified gene risk biomarkers (mutations) in ovarian cancer. The genes are BRCA1 (UPKB:P38398), BRCA2 (UPKB:P51587). This panel is curated from the FDA Approved biomarkers." - ], - "score": { - "usability_domain_length": 194 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000697/v-2.1.1", - "usability_domain": [ - "The Rat O-GlcNAc Glycosylation Citations (MCW) dataset contains details of the publication present in the dataset Rat O-GlcNAc Glycosylation Sites (MCW) contributed by Stephanie Olivier's group." - ], - "score": { - "usability_domain_length": 194 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000539/v-2.0.2", - "usability_domain": [ - "The datasets provides a list of GlyTouCan accessions which are supported by GNOme ontology (https://gnome.glyomics.org/) (https://gnome.glyomics.org/restrictions/GlyGen.StructureBrowser.html). " - ], - "score": { - "usability_domain_length": 193 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000539/v-2.1.1", - "usability_domain": [ - "The datasets provides a list of GlyTouCan accessions which are supported by GNOme ontology (https://gnome.glyomics.org/) (https://gnome.glyomics.org/restrictions/GlyGen.StructureBrowser.html). " - ], - "score": { - "usability_domain_length": 193 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000482/v-2.0.2", - "usability_domain": [ - "Citation details (such as title, journal_name, publication_date, authors) corresponding to the Digital Object Identifiers (DOI)/PMID present in dataset https://data.glygen.org/GLYDS000479" - ], - "score": { - "usability_domain_length": 187 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000482/v-2.1.1", - "usability_domain": [ - "Citation details (such as title, journal_name, publication_date, authors) corresponding to the Digital Object Identifiers (DOI)/PMID present in dataset https://data.glygen.org/GLYDS000479" - ], - "score": { - "usability_domain_length": 187 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_031929/1.0", - "usability_domain": [ - "Generation of paired-end synthetic reads from 4 virus sequences: Human Adenovirus (NC_001405.1), West Nile virus (NC_001563), Hepatitis B virus (NC_003977), and Lassa virus (NC_004297)." - ], - "score": { - "usability_domain_length": 186 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000083/1.0.36", - "usability_domain": [ - "Human [taxid:9606] miRNA file complete dataset corresponding to the current BioXpress version. All differentially expressed miRNAs in all cancers (from matched tumor-normal samples)." - ], - "score": { - "usability_domain_length": 182 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000054/1.0.36", - "usability_domain": [ - "List of human [taxid:9606] mutations in cancer from TCGA and ICGC mapped to RefSeq nucleotide coordinates, UniProtKB amino acid coordinates, and Disease Ontology disease terms" - ], - "score": { - "usability_domain_length": 175 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000018/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] mutations in cancer from TCGA and ICGC mapped to RefSeq nucleotide coordinates, UniProtKB amino acid coordinates, and Disease Ontology disease terms" - ], - "score": { - "usability_domain_length": 175 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000309/v-2.0.2", - "usability_domain": [ - "The dataset lists the synthesized glycans which are identified by the the GlyTouCan accessions. Source:- The Boons Group. https://www.ccrc.uga.edu/~gjboons/boons/Home.htm()" - ], - "score": { - "usability_domain_length": 172 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000060/1.0.36", - "usability_domain": [ - "List of human [taxid:9606] functional elements by cancer and race. This file contains functional elements for five different cancers and each cancer is categorized by race." - ], - "score": { - "usability_domain_length": 172 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000309/v-2.1.1", - "usability_domain": [ - "The dataset lists the synthesized glycans which are identified by the the GlyTouCan accessions. Source:- The Boons Group. https://www.ccrc.uga.edu/~gjboons/boons/Home.htm()" - ], - "score": { - "usability_domain_length": 172 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000732/v-2.1.1", - "usability_domain": [ - "The dataset contains publication information of the PMIDs present in the Human Glycosylation Sites UniCarbKB Glycomics Study dataset (https://data.glygen.org/GLY_000611)." - ], - "score": { - "usability_domain_length": 170 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000727/v-2.1.1", - "usability_domain": [ - "Human Protein Biomarkers Citations dataset contains details of the publication present in the dataset Human Protein Cancer Biomarkers https://data.glygen.org/GLY_000625" - ], - "score": { - "usability_domain_length": 168 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000734/v-2.1.1", - "usability_domain": [ - "The dataset contains publication information of the PMIDs present in the Rat Glycosylation Sites UniCarbKB Glycomics Study dataset (https://data.glygen.org/GLY_000733)." - ], - "score": { - "usability_domain_length": 168 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000053/1.0.36", - "usability_domain": [ - "Clinical data from normal samples from TCGA datasets that were corresponding to the data used for differential gene expression analysis (Meta data for BioXpress V4.0)." - ], - "score": { - "usability_domain_length": 167 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000038/1.0.25", - "usability_domain": [ - "Clinical data from normal samples from TCGA datasets that were corresponding to the data used for differential gene expression analysis (Meta data for BioXpress V4.0)." - ], - "score": { - "usability_domain_length": 167 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000037/1.0.25", - "usability_domain": [ - "Clinical data from cancer samples from TCGA datasets that were corresponding to the data used for differential gene expression analysis (Meta data for BioXpress V4.0)." - ], - "score": { - "usability_domain_length": 167 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000041/1.0.36", - "usability_domain": [ - "Clinical data from cancer samples from TCGA datasets that were corresponding to the data used for differential gene expression analysis (Meta data for BioXpress V4.0)." - ], - "score": { - "usability_domain_length": 167 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000739/v-2.1.1", - "usability_domain": [ - "The Human Germline Mutation Citations dataset contains details of the publications present in the Human Germline Mutation dataset (https://data.glygen.org/GLY_000459)." - ], - "score": { - "usability_domain_length": 167 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000722/v-2.1.1", - "usability_domain": [ - "The Human Literature Mutations Citations contains details of the publication present in the dataset Human Literature Mutations https://data.glygen.org/GLY_000463." - ], - "score": { - "usability_domain_length": 162 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000080/1.0.36", - "usability_domain": [ - "Differential Expression Glycosyltransferases in Human [taxid:9606] Cancer. These enzymes are mapped with disease ontology [DOID] retrieved from TCGA cancers. " - ], - "score": { - "usability_domain_length": 158 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000738/v-2.1.1", - "usability_domain": [ - "The Glycan Biomarkers Citations dataset contains details of the publications present in the Glycan Biomarkers dataset (https://data.glygen.org/GLY_000737)." - ], - "score": { - "usability_domain_length": 155 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000721/v-2.1.1", - "usability_domain": [ - "The Human Somatic Mutation Citations contains details of the publication present in the dataset Human Somatic Mutation https://data.glygen.org/GLY_000462." - ], - "score": { - "usability_domain_length": 154 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000025/1.0.25", - "usability_domain": [ - "List of mouse [taxid:10090] genes with normal RNA-Seq and Affymetrix expression data from Bgee for anatomical entities associated with cancer." - ], - "score": { - "usability_domain_length": 142 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000555/v-2.0.2", - "usability_domain": [ - "The file includes glycan annotation (GlyTouCan ac and ChEBI Id) for the protein + site included in UniProtKB(https://uniprot.org/) database. " - ], - "score": { - "usability_domain_length": 141 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000023/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] genes with normal RNA-Seq and Affymetrix expression data from Bgee for anatomical entities associated with cancer." - ], - "score": { - "usability_domain_length": 141 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000555/v-2.1.1", - "usability_domain": [ - "The file includes glycan annotation (GlyTouCan ac and ChEBI Id) for the protein + site included in UniProtKB(https://uniprot.org/) database. " - ], - "score": { - "usability_domain_length": 141 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000694/v-2.0.2", - "usability_domain": [ - "Mouse Xref The O-GlcNAc Database contains mouse (taxid:10090) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." - ], - "score": { - "usability_domain_length": 140 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000694/v-2.1.1", - "usability_domain": [ - "Mouse Xref The O-GlcNAc Database contains mouse (taxid:10090) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." - ], - "score": { - "usability_domain_length": 140 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000050/1.0", - "usability_domain": [ - "Lineage assignment for an isolate of M. tuberculosis[taxonomy:1773] based on its single nucleotide polymorphism [so:0000694] (SNP) profile." - ], - "score": { - "usability_domain_length": 139 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000553/v-2.0.2", - "usability_domain": [ - "Human Xref The O-GlcNAc Database contains human (taxid:9606) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." - ], - "score": { - "usability_domain_length": 139 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000553/v-2.1.1", - "usability_domain": [ - "Human Xref The O-GlcNAc Database contains human (taxid:9606) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." - ], - "score": { - "usability_domain_length": 139 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000731/v-2.1.1", - "usability_domain": [ - "SARS-CoV2 UniProtKB xref GlyConnect dataset contains sarscov2 (taxid: 2697049) UniProtKB canonical accessions mapped to the GlyConnect IDs." - ], - "score": { - "usability_domain_length": 139 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000695/v-2.0.2", - "usability_domain": [ - "Rat Xref The O-GlcNAc Database contains rat (taxid:10116) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." - ], - "score": { - "usability_domain_length": 136 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000695/v-2.1.1", - "usability_domain": [ - "Rat Xref The O-GlcNAc Database contains rat (taxid:10116) UniProtKB canonical accessions mapped to the The O-GlcNAc Database accessions." - ], - "score": { - "usability_domain_length": 136 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000529/v-2.0.2", - "usability_domain": [ - "The file contain disease data associated to protein/gene downloaded from GlyCosmos database. [https://glycosmos.org/homes/download]. " - ], - "score": { - "usability_domain_length": 133 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000529/v-2.1.1", - "usability_domain": [ - "The file contain disease data associated to protein/gene downloaded from GlyCosmos database. [https://glycosmos.org/homes/download]. " - ], - "score": { - "usability_domain_length": 133 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000724/v-2.1.1", - "usability_domain": [ - "Mouse Xref O-GlcNAcAtlas dataset contains mouse (taxid: 10090) UniProtKB canonical accessions mapped to the O-GlcNAcAtlas accessions." - ], - "score": { - "usability_domain_length": 133 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000723/v-2.1.1", - "usability_domain": [ - "Human Xref O-GlcNAcAtlas dataset contains human (taxid: 9606) UniProtKB canonical accessions mapped to the O-GlcNAcAtlas accessions." - ], - "score": { - "usability_domain_length": 132 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000725/v-2.1.1", - "usability_domain": [ - "Rat Xref O-GlcNAcAtlas dataset contains rat (taxid: 10116) UniProtKB canonical accessions mapped to the O-GlcNAcAtlas accessions." - ], - "score": { - "usability_domain_length": 129 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000729/v-2.1.1", - "usability_domain": [ - "Mouse UniProtKB xref GlyConnect dataset contains mouse(taxid: 10090) UniProtKB canonical accessions mapped to the GlyConnect IDs." - ], - "score": { - "usability_domain_length": 129 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000728/v-2.1.1", - "usability_domain": [ - "Human UniProtKB xref GlyConnect dataset contains human(taxid: 9606) UniProtKB canonical accessions mapped to the GlyConnect IDs." - ], - "score": { - "usability_domain_length": 128 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000538/v-2.0.2", - "usability_domain": [ - "The dataset provides citations for the associated glycan (GlyTouCan Accession) from dataset http://data.glygen.org/GLY_000528. " - ], - "score": { - "usability_domain_length": 127 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000587/v-2.0.2", - "usability_domain": [ - "The dataset provides all glycans (identified by GlyTouCan accession) from GlyTouCan. Source database: https://glytoucan.org/. " - ], - "score": { - "usability_domain_length": 126 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000587/v-2.1.1", - "usability_domain": [ - "The dataset provides all glycans (identified by GlyTouCan accession) from GlyTouCan. Source database: https://glytoucan.org/. " - ], - "score": { - "usability_domain_length": 126 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000730/v-2.1.1", - "usability_domain": [ - "Rat UniProtKB xref GlyConnect dataset contains rat(taxid: 10116) UniProtKB canonical accessions mapped to the GlyConnect IDs." - ], - "score": { - "usability_domain_length": 125 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000045/1.0.36", - "usability_domain": [ - "This dataset contains public cancer biomarkers retrieved from the Early Detection Research Network (EDRN)." - ], - "score": { - "usability_domain_length": 106 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000019/1.0.25", - "usability_domain": [ - "This dataset contains public cancer biomarkers retrieved from the Early Detection Research Network (EDRN)." - ], - "score": { - "usability_domain_length": 106 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000031/1.0.25", - "usability_domain": [ - "List of one to one orthologous Ensembl gene identifiers and HGNC/MGI gene symbols for humans and mice." - ], - "score": { - "usability_domain_length": 102 - } - }, - { - "object_id": "https://biocomputeobject.org/GALXY_000004/0.0", - "usability_domain": [ - "A workflow to assemble raw COVID19 virus Illumina sequencing reads, using the SPADES assembler." - ], - "score": { - "usability_domain_length": 95 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000595/v-2.0.2", - "usability_domain": [ - "", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 94 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000594/v-2.0.2", - "usability_domain": [ - "", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 94 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000596/v-2.0.2", - "usability_domain": [ - "", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 94 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000594/v-2.1.1", - "usability_domain": [ - "", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 94 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000595/v-2.1.1", - "usability_domain": [ - "", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 94 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000596/v-2.1.1", - "usability_domain": [ - "", - "REACTOME is an open-source, open access, manually curated and peer-reviewed pathway database. " - ], - "score": { - "usability_domain_length": 94 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000240/1.0", - "usability_domain": [ - "Prediction of research topic of publication based on topic of cited papers on citation graph" - ], - "score": { - "usability_domain_length": 92 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000011/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] mRNAs with literature evidence of expression in lung cancer" - ], - "score": { - "usability_domain_length": 86 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000052/1.0.36", - "usability_domain": [ - "List of human [taxid:9606] genes with literature evidence of mutation in all cancers." - ], - "score": { - "usability_domain_length": 85 - } - }, - { - "object_id": "https://biocomputeobject.org/GALXY_000007/1.0", - "usability_domain": [ - "", - "Workflow metagenomic annotation for sequencing reads and phylogenetic tree creation." - ], - "score": { - "usability_domain_length": 84 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000020/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] genes with literature evidence of mutation in all cancers" - ], - "score": { - "usability_domain_length": 84 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000014/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] genes with literature evidence of mutation in lung cancer" - ], - "score": { - "usability_domain_length": 84 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000010/1.0.25", - "usability_domain": [ - "List of human [taxid:9606] miRNAs with literature evidence of expression in cancer" - ], - "score": { - "usability_domain_length": 82 - } - }, - { - "object_id": "https://biocomputeobject.org/GALXY_000006/1.0", - "usability_domain": [ - "Processing NGS exome sequencing reads. alignment, gene assembly, variant calling" - ], - "score": { - "usability_domain_length": 80 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000527/v-2.0.2", - "usability_domain": [ - "The dataset includes GlyTouCan accessions included in Sandbox application." - ], - "score": { - "usability_domain_length": 74 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000527/v-2.1.1", - "usability_domain": [ - "The dataset includes GlyTouCan accessions included in Sandbox application." - ], - "score": { - "usability_domain_length": 74 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000410/1.0", - "usability_domain": [ - "Test BCO for sample of OncoMX data for the biomarker-partnership." - ], - "score": { - "usability_domain_length": 65 - } - }, - { - "object_id": "https://biocomputeobject.org/ARGOS_000004/1.4", - "usability_domain": [ - "SARS-CoV-2 (Wuhan-Hu-1) reference protein coding sequences." - ], - "score": { - "usability_domain_length": 59 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000476/v-2.0.2", - "usability_domain": [ - "The dataset provides rdf fomat file for the glycan data. " - ], - "score": { - "usability_domain_length": 57 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000476/v-2.1.1", - "usability_domain": [ - "The dataset provides rdf fomat file for the glycan data. " - ], - "score": { - "usability_domain_length": 57 - } - }, - { - "object_id": "https://biocomputeobject.org/GALXY_000005/0.0", - "usability_domain": [ - "Comparison of COVID19 assembled genomes for variants" - ], - "score": { - "usability_domain_length": 52 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000082/1.0.36", - "usability_domain": [ - "Human Cancer Glycosyltranferases retrieved from TCGA" - ], - "score": { - "usability_domain_length": 52 - } - }, - { - "object_id": "https://biocomputeobject.org/GALXY_000008/1.0", - "usability_domain": [ - "Peak finder for CHIPseq read data" - ], - "score": { - "usability_domain_length": 33 - } - }, - { - "object_id": "https://biocomputeobject.org/GALXY_000016/3.0", - "usability_domain": [ - "This is the workflow annotation" - ], - "score": { - "usability_domain_length": 31 - } - }, - { - "object_id": "https://biocomputeobject.org/GALXY_000016/3.1", - "usability_domain": [ - "This is the workflow annotation" - ], - "score": { - "usability_domain_length": 31 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000600/v-2.0.2", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000601/v-2.0.2", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000606/v-2.0.2", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000603/v-2.0.2", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000604/v-2.0.2", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000605/v-2.0.2", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000602/v-2.0.2", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000601/v-2.1.1", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000602/v-2.1.1", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000603/v-2.1.1", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000604/v-2.1.1", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000605/v-2.1.1", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000606/v-2.1.1", - "usability_domain": [ - "-" - ], - "score": { - "usability_domain_length": 1 - } - }, - { - "object_id": "https://biocomputeobject.org/GALXY_000001/1.0", - "usability_domain": [ - "" - ], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000607/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000522/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000565/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000611/v-2.0.2", - "usability_domain": [ - "" - ], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000608/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000509/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000592/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000549/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000550/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000562/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000619/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000546/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000575/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000548/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000617/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000551/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000519/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000563/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000618/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000547/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000561/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000507/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000564/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000610/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000508/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000593/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000540/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000609/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000572/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000577/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000545/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000679/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000526/v-2.0.2", - "usability_domain": [ - "", - "" - ], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000627/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000566/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000521/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000542/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000591/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000570/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000569/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000612/v-2.0.2", - "usability_domain": [ - "" - ], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000588/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000520/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000567/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000543/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000571/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000590/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000589/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000568/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000677/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000678/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000699/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000544/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000560/v-2.0.2", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000316/3.0", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/OMX_000093/1.0.36", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000507/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000508/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000509/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000519/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000520/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000521/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000522/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000526/v-2.1.1", - "usability_domain": [ - "", - "" - ], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000540/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000542/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000543/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000544/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000545/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000546/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000547/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000548/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000549/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000550/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000551/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000560/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000561/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000562/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000563/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000564/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000565/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000566/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000567/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000568/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000569/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000570/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000571/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000572/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000575/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000577/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000588/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000589/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000590/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000591/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000592/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000593/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000607/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000608/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000610/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000611/v-2.1.1", - "usability_domain": [ - "" - ], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000612/v-2.1.1", - "usability_domain": [ - "" - ], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000617/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000618/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000619/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000627/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000677/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000678/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000679/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000699/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000609/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GLY_000733/v-2.1.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/GALXY_000027/0.0", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000476/3.0", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000478/3.0", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000480/3.1", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - }, - { - "object_id": "https://biocomputeobject.org/BCO_000482/3.0", - "usability_domain": [], - "score": { - "usability_domain_length": 0 - } - } -] \ No newline at end of file diff --git a/docs/deployment/productionDeployment.md b/docs/deployment/productionDeployment.md index e69de29..4898768 100644 --- a/docs/deployment/productionDeployment.md +++ b/docs/deployment/productionDeployment.md @@ -0,0 +1,31 @@ +# BCODB Production Update Deployment + +**for instructions on deploying a NEW production instance see [newProductionInstance.md](/docs/newProductionInstance.md) + +## Login to server and navigate to project root + +Login example: +``` +sh USER_NAME@test.portal.biochemistry.gwu.edu +cd /var/www/bcoeditor/bco_api +``` + +## Switch to and pull desired barnch +``` +git fetch --all +git switch [DESIRED BRANCH] +``` +## Update any configurations or settings required by the new version +- version in .secrets, etc. + +## Restart the service +``` +sudo systemctl restart bco_api +``` +## Make the serive is running +``` +sudo systemctl status bco_api +``` + +## Check that cahnges are live +Navigate to `PROJECT-URL/api/docs/`. You should see the version value you entered in the `.secrets` file displayed on the Swagger page. \ No newline at end of file diff --git a/docs/newProductionInstance.md b/docs/newProductionInstance.md new file mode 100644 index 0000000..d715f5b --- /dev/null +++ b/docs/newProductionInstance.md @@ -0,0 +1 @@ +# BCODB New Production Deployment \ No newline at end of file From 34a61c61f49105bdc9355c08b3deee4ec692ba48 Mon Sep 17 00:00:00 2001 From: Kiran Sen <166645524+Kirans0615@users.noreply.github.com> Date: Thu, 18 Jul 2024 11:53:27 -0400 Subject: [PATCH 3/4] Update config.md --- docs/config.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/config.md b/docs/config.md index 2704dc5..792f10b 100644 --- a/docs/config.md +++ b/docs/config.md @@ -9,6 +9,7 @@ SECRET_KEY=^2uql114+yy0d$xv6+lm8*#1=uxs_oa0zw0bvu^fpi4tc9x0i ANON_KEY=627626823549f787c3ec763ff687169206626149 [SERVER] +PRODUCTION=False DEBUG=True ALLOWED_HOSTS=* SERVER_VERSION=24.06.13 @@ -64,4 +65,4 @@ This value is used as the `"NAME"`in Django's [DATABASES](https://docs.djangopro ### EMAIL_BACKEND Specifies which of Django's [EMAIL_BACKEND](https://docs.djangoproject.com/en/5.0/topics/email/#topic-email-backends) classes to use. -This app has been tested using the `django.core.mail.backends.smtp.EmailBackend` with `sendmail` and a GMail account in production, and with `django.core.mail.backends.console.EmailBackend` in local deployments. \ No newline at end of file +This app has been tested using the `django.core.mail.backends.smtp.EmailBackend` with `sendmail` and a GMail account in production, and with `django.core.mail.backends.console.EmailBackend` in local deployments. From 945744a2b92e721fa11478b1264d248724e5242c Mon Sep 17 00:00:00 2001 From: hadleyking Date: Mon, 22 Jul 2024 15:20:51 -0400 Subject: [PATCH 4/4] Add API endpoint for basic comaprison Changes to be committed: modified: biocompute/apis.py modified: biocompute/urls.py modified: requirements.txt --- biocompute/apis.py | 88 ++++++++++++++++++++++++++++++++++++++++++++-- biocompute/urls.py | 2 ++ requirements.txt | 13 +++++-- 3 files changed, 98 insertions(+), 5 deletions(-) diff --git a/biocompute/apis.py b/biocompute/apis.py index 7a26328..8b66fc2 100644 --- a/biocompute/apis.py +++ b/biocompute/apis.py @@ -25,6 +25,7 @@ bulk_response_constructor, response_status, ) +from deepdiff import DeepDiff from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from django.conf import settings @@ -34,7 +35,7 @@ from rest_framework.views import APIView from rest_framework.permissions import IsAuthenticated, AllowAny from rest_framework.response import Response -from tests.fixtures.testing_bcos import BCO_000001_DRAFT +from tests.fixtures.testing_bcos import BCO_000001_DRAFT, BCO_000000_DRAFT hostname = settings.PUBLIC_HOSTNAME BASE_DIR = settings.BASE_DIR @@ -198,7 +199,7 @@ class PublishBcoApi(APIView): """ permission_classes = [IsAuthenticated] - # swagger_schema = None + swagger_schema = None #TODO: Add Swaggar docs # schema = jsonref.load_uri( # f"file://{BASE_DIR}/config/IEEE/2791object.json" @@ -860,3 +861,86 @@ def get(self, request, bco_accession, bco_version): bco_counter_increment(bco_instance) return Response(status=status.HTTP_200_OK, data=bco_instance.contents) + +class CompareBcoApi(APIView): + """Bulk Compare BCOs [Bulk Enabled] + + -------------------- + + Bulk operation to compare BCOs. + + ```JSON + [ + {...BCO CONTENTS...}, + {...BCO CONTENTS...} + ] + + """ + + authentication_classes = [] + permission_classes = [AllowAny] + + @swagger_auto_schema( + operation_id="api_bco_compare", + request_body=openapi.Schema( + type=openapi.TYPE_ARRAY, + title="Bulk Compare BCOs", + items=openapi.Schema( + type=openapi.TYPE_ARRAY, + example=[BCO_000000_DRAFT, BCO_000001_DRAFT], + items=openapi.Schema( + type=openapi.TYPE_OBJECT, + required=["contents"], + description="Contents of the BCO.", + ) + ), + description="Compare one BCO against another.", + ), + responses={ + 200: "All BCO comparisons are successful.", + 207: "Some or all BCO comparisons failed. Each object submitted" + " will have it's own response object with it's own status" + " message:\n", + 400: "Bad request." + }, + tags=["BCO Management"], + ) + def post(self, request): + validator = BcoValidator() + response_data = [] + rejected_requests = False + accepted_requests = True + data = request.data + + for index, comparison in enumerate(data): + new_bco, old_bco = comparison + identifier = new_bco["object_id"]+ " vs " + old_bco["object_id"] + + # new_results = validator.parse_and_validate(bco=new_bco) + # old_results = validator.parse_and_validate(bco=old_bco) + # import pdb; pdb.set_trace() + # new_identifier, new_results = new_results.popitem() + # old_identifier, old_results = bco_results.popitem() + + # if results["number_of_errors"] > 0: + # rejected_requests = True + # bco_status = "FAILED" + # status_code = 400 + # message = "BCO not valid" + + # else: + # accepted_requests = True + # bco_status = "SUCCESS" + # status_code = 200 + # message = "BCO valid" + + response_data.append(bulk_response_constructor( + identifier = identifier, + status="SUCCESS", + code=200, + # message=message, + data=DeepDiff(new_bco, old_bco).to_json() + )) + + status_code = response_status(accepted_requests, rejected_requests) + return Response(status=status_code, data=response_data) \ No newline at end of file diff --git a/biocompute/urls.py b/biocompute/urls.py index 8e21d66..7ce171c 100644 --- a/biocompute/urls.py +++ b/biocompute/urls.py @@ -9,6 +9,7 @@ DraftsPublishApi, PublishBcoApi, ValidateBcoApi, + CompareBcoApi, ) urlpatterns = [ @@ -17,4 +18,5 @@ path("objects/drafts/publish/", DraftsPublishApi.as_view()), path("objects/validate/", ValidateBcoApi.as_view()), path("objects/publish/", PublishBcoApi.as_view()), + path("objects/compare/", CompareBcoApi.as_view()), ] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4fd77ca..ee834db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ appdirs==1.4.3 asgiref==3.3.4 astroid==2.9.3 -attrs==20.3.0 +attrs==23.2.0 black==22.6.0 CacheControl==0.12.6 cachetools==5.3.0 @@ -11,18 +11,20 @@ chardet==3.0.4 charset-normalizer==2.0.7 click==8.1.3 colorama==0.4.3 +configparser==5.3.0 contextlib2==0.6.0 coreapi==2.3.3 coreschema==0.0.4 coverage==6.3.2 cryptography==39.0.0 +deepdiff==7.0.1 distlib==0.3.0 distro==1.4.0 Django==3.2.13 django-cors-headers==3.7.0 -django-guardian==2.3.0 django-reset-migrations==0.4.0 django-rest-framework==0.1.0 +django-rest-passwordreset==1.3.0 django-rest-swagger==2.2.0 djangorestframework==3.12.2 djangorestframework-api-key==2.0.0 @@ -42,7 +44,8 @@ isort==5.10.1 itypes==1.2.0 Jinja2==3.0.1 jsonref==0.2 -jsonschema==3.2.0 +jsonschema==4.20.0 +jsonschema-specifications==2023.12.1 lazy-object-proxy==1.7.1 lockfile==0.12.2 MarkupSafe==2.0.1 @@ -50,12 +53,14 @@ mccabe==0.6.1 msgpack==0.6.2 mypy-extensions==0.4.3 openapi-codec==1.3.2 +ordered-set==4.1.0 packaging==20.3 pathspec==0.9.0 pep517==0.8.2 platformdirs==2.5.1 pluggy==1.2.0 progress==1.5 +psycopg2==2.9.5 pyasn1==0.4.8 pyasn1-modules==0.2.8 pycparser==2.21 @@ -71,8 +76,10 @@ python-dateutil==2.8.1 pytoml==0.1.21 pytz==2020.4 PyYAML==6.0 +referencing==0.32.0 requests==2.26.0 retrying==1.3.3 +rpds-py==0.16.2 rsa==4.9 ruamel.yaml==0.17.16 ruamel.yaml.clib==0.2.6