From 3542b10730a336e67b02d1eb6efc26c9b94b33c4 Mon Sep 17 00:00:00 2001 From: Chris Mungall Date: Mon, 29 Jul 2024 17:25:08 -0700 Subject: [PATCH] Fixing test 121 to be valid. (#143) * Fixing test 121 to be valid. Fixes #141 * fixed typo --- tests/input/mixs6_core_test.tsv | 22 +++++++++++----------- tests/test_121/input/schema_def.tsv | 4 ++-- tests/test_121/test_mixs_generation.py | 8 -------- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/tests/input/mixs6_core_test.tsv b/tests/input/mixs6_core_test.tsv index be11c8b..058e8ef 100644 --- a/tests/input/mixs6_core_test.tsv +++ b/tests/input/mixs6_core_test.tsv @@ -2,24 +2,24 @@ Structured comment name Item (rdfs:label) Action carried out and logged by CIH i > slot title comments comments comments ignore notes ignore see_also description ignore pattern ignore in_subset cardinality cardinality cardinality cardinality cardinality cardinality cardinality cardinality cardinality cardinality cardinality annotations multivalued annotations slot_uri exact_mapping > "prefix: ""review completed """ "prefix: ""reviewer: """ "applies_to_class: ""MIGS eukaryote""" "applies_to_class: ""MIGS bacteria""" "applies_to_class: ""MIGS plant""" "applies_to_class: ""MIGS virus""" "applies_to_class: ""MIGS org""" "applies_to_class: ""MIMS""" "applies_to_class: ""MIMARKS specimen""" "applies_to_class: ""MIMARKS survey""" "applies_to_class: ""MISAG""" "applies_to_class: ""MIMAG""" "applies_to_class: ""MIUVIG""" tag: preferred_unit vmap: {1: False, 2: True} curie_prefix: MIGS submitted_to_insdc submitted to insdc deprecated term remove X fine as is; this term is not used for submission, LS: suggest removing from MIxS core, NCBI - OK to remove CIH MIXS core https://github.com/GenomicsStandardsConsortium/mixs/issues/60 Depending on the study (large-scale e.g. done with next generation sequencing technology, or small-scale) sequences have to be submitted to SRA (Sequence Read Archive), DRA (DDBJ Read Archive) or via the classical Webin/Sequin systems to Genbank, ENA and DDBJ. Although this field is mandatory, it is meant as a self-test field, therefore it is not necessary to include this field in contextual data submitted to databases boolean {boolean} yes investigation M M M M M M M M M M M 1 1 MIXS:0000004 MIGS-1 -investigation_type investigation type deprecated term remove X query definition; this term is not used for submission, LS: suggest removing from MIxS core; NCBI - OK to remove CIH MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/60 Nucleic Acid Sequence Report is the root element of all MIGS/MIMS compliant reports as standardized by Genomic Standards Consortium. This field is either eukaryote,bacteria,virus,plasmid,organelle, metagenome,mimarks-survey, mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled genome, or uncultivated viral genome eukaryote, bacteria_archaea, plasmid, virus, organelle, metagenome,mimarks-survey, mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled genome, or uncultivated viral genomes [eukaryote|bacteria_archaea|plasmid|virus|organelle|metagenome|metatranscriptome|mimarks-survey|mimarks-specimen|misag|mimag|miuvig] metagenome investigation M M M M M M M M M M M 1 2 MIXS:0000007 MIGS-2 -samp_name sample name updated description add to the core NEW core term - Approved in Sept CIG call LS MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/78 A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name. text {text} ISDsoil1 investigation M M M M M M M M M M M 1 MIXS:0001107 +investigation_type investigation type deprecated term remove X query definition; this term is not used for submission, LS: suggest removing from MIxS core; NCBI - OK to remove CIH MIXS core (mandatory) https://github.com/GenomicsStandardsConsortium/mixs/issues/60 Nucleic Acid Sequence Report is the root element of all MIGS/MIMS compliant reports as standardized by Genomic Standards Consortium. This field is either eukaryote,bacteria,virus,plasmid,organelle, metagenome,mimarks-survey, mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled genome, or uncultivated viral genome eukaryote, bacteria_archaea, plasmid, virus, organelle, metagenome,mimarks-survey, mimarks-specimen, metatranscriptome, single amplified genome, metagenome-assembled genome, or uncultivated viral genomes [eukaryote|bacteria_archaea|plasmid|virus|organelle|metagenome|metatranscriptome|mimarks-survey|mimarks-specimen|misag|mimag|miuvig] metagenome investigation M M M M M M M M M M M 1 2 MIXS:0000007 MIGS-2 +samp_name sample name updated description add to the core NEW core term - Approved in Sept CIG call LS MIXS core (mandatory) https://github.com/GenomicsStandardsConsortium/mixs/issues/78 A local identifier or name that for the material sample used for extracting nucleic acids, and subsequent sequencing. It can refer either to the original material collected or to any derived sub-samples. It can have any format, but we suggest that you make it concise, unique and consistent within your lab, and as informative as possible. INSDC requires every sample name from a single Submitter to be unique. Use of a globally unique identifier for the field source_mat_id is recommended in addition to sample_name. text {text} ISDsoil1 investigation M M M M M M M M M M M 1 MIXS:0001107 samp_taxon_id Taxonomy ID of DNA sample suggested as part of +/-ive controls work, but would actually be an appropriate addition to all checklists so adding it to the core X #205 NCBI taxon id of the sample. Maybe be a single taxon or mixed taxa sample. Use 'synthetic metagenome’ for mock community/positive controls, or 'blank sample' for negative controls. Taxonomy ID {text} [NCBI:txid] Gut Metagenome [NCBI:txid749906] investigation M M M M M M M M M M M 1 MIXS:0001320 -project_name project name no change made X fine as is CIH MIXS core (manditory) Name of the project within which the sequencing was organized {text} Forest soil metagenome investigation M M M M M M M M M M M 1 3 MIXS:0000092 MIGS-3 +project_name project name no change made X fine as is CIH MIXS core (mandatory) Name of the project within which the sequencing was organized {text} Forest soil metagenome investigation M M M M M M M M M M M 1 3 MIXS:0000092 MIGS-3 experimental_factor experimental factor no change made X fine as is CIH MIXS core (optional) Experimental factors are essentially the variable aspects of an experiment design which can be used to describe an experiment, or set of experiments, in an increasingly detailed manner. This field accepts ontology terms from Experimental Factor Ontology (EFO) and/or Ontology for Biomedical Investigations (OBI). For a browser of EFO (v 2.95) terms, please see http://purl.bioontology.org/ontology/EFO; for a browser of OBI (v 2018-02-12) terms please see http://purl.bioontology.org/ontology/OBI text or EFO and/or OBI {termLabel} {[termID]}|{text} time series design [EFO:EFO_0001779] investigation X X X X X C C X C C C 1 4 MIXS:0000008 -lat_lon geographic location (latitude and longitude) no change made Decision: keep the name of the field as is. - Issue: 1 field, do we accept range and area; Issue 2: split into two fields - discussion: consensus is to keep it as one field; for NCBI keep as one combined field . Note: INSDC discussion, limit to 8 decimal points. X Should the definition be updated to include defined areas using closed linear ring notation? (A linear ring is a closed LineString with four or more positions). (CIH); Suggest renaming to : latitude and longitude [LS] CIH MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/62 The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system decimal degrees, limit to 8 decimal points {float} {float} 50.586825 6.408977 environment M M M M M M M M M M M 1 5 MIXS:0000009 MIGS-4.1|MIGS-4.2 +lat_lon geographic location (latitude and longitude) no change made Decision: keep the name of the field as is. - Issue: 1 field, do we accept range and area; Issue 2: split into two fields - discussion: consensus is to keep it as one field; for NCBI keep as one combined field . Note: INSDC discussion, limit to 8 decimal points. X Should the definition be updated to include defined areas using closed linear ring notation? (A linear ring is a closed LineString with four or more positions). (CIH); Suggest renaming to : latitude and longitude [LS] CIH MIXS core (mandatory) https://github.com/GenomicsStandardsConsortium/mixs/issues/62 The geographical origin of the sample as defined by latitude and longitude. The values should be reported in decimal degrees and in WGS84 system decimal degrees, limit to 8 decimal points {float} {float} 50.586825 6.408977 environment M M M M M M M M M M M 1 5 MIXS:0000009 MIGS-4.1|MIGS-4.2 depth depth Updated Item name and definition CHANGE: Item name to: depth add to core, check for consistency of definitions - DONE X Needs updating CIH MIXS core (optional) https://github.com/GenomicsStandardsConsortium/mixs/issues/63 The vertical distance below local surface, e.g. for sediment or soil samples depth is measured from sediment or soil surface, respectively. Depth can be reported as an interval for subsurface samples. measurement value {float} {unit} 10 meter environment E E E E E E E E E E E 1 6 MIXS:0000018 MIGS-4.3 alt altitude no change made X fine as is -but does it need to be in packages AS WELL? - Not needed in all packages [LS] CIH MIXS core (optional) Altitude is a term used to identify heights of objects such as airplanes, space shuttles, rockets, atmospheric balloons and heights of places such as atmospheric layers and clouds. It is used to measure the height of an object which is above the earth's surface. In this context, the altitude measurement is the vertical distance between the earth's surface above sea level and the sampled position in the air measurement value {float} {unit} 100 meter environment E E E E E E E E E E E 1 MIXS:0000094 MIGS-4.4 elev elevation no change made X fine as is -but does it need to be in packages AS WELL? CIH MIXS core (optional) Elevation of the sampling site is its height above a fixed reference point, most commonly the mean sea level. Elevation is mainly used when referring to points on the earth's surface, while altitude is used for points above the surface, such as an aircraft in flight or a spacecraft in orbit. measurement value {float} {unit} 100 meter environment E E E E E E E E E E E 1 7 MIXS:0000093 temp temperature added to core #71 Temperature of the sample at the time of sampling. measurement value {float} {unit} 25 degree Celsius environment E E E E E E E E E E E degree Celsius 1 40 MIXS:0000113 -geo_loc_name geographic location (country and/or sea,region) updated definition improve definition to address GitHub ticket - https://github.com/GenomicsStandardsConsortium/mixs/issues/17 X there is a query related to this in github CIH MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/79 The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ) country or sea name (INSDC or GAZ): region(GAZ), specific location name {term}: {term}, {text} USA: Maryland, Bethesda environment M M M M M M M M M M M 1 8 MIXS:0000010 MIGS-4 -collection_date collection date no change made X fine as is CIH MIXS core (manditory) The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant date and time {timestamp} 2018-05-11T10:00:00+01:00; 2018-05-11 environment M M M M M M M M M M M 1 9 MIXS:0000011 MIGS-5 +geo_loc_name geographic location (country and/or sea,region) updated definition improve definition to address GitHub ticket - https://github.com/GenomicsStandardsConsortium/mixs/issues/17 X there is a query related to this in github CIH MIXS core (mandatory) https://github.com/GenomicsStandardsConsortium/mixs/issues/79 The geographical origin of the sample as defined by the country or sea name followed by specific region name. Country or sea names should be chosen from the INSDC country list (http://insdc.org/country.html), or the GAZ ontology (http://purl.bioontology.org/ontology/GAZ) country or sea name (INSDC or GAZ): region(GAZ), specific location name {term}: {term}, {text} USA: Maryland, Bethesda environment M M M M M M M M M M M 1 8 MIXS:0000010 MIGS-4 +collection_date collection date no change made X fine as is CIH MIXS core (mandatory) The time of sampling, either as an instance (single point in time) or interval. In case no exact time is available, the date/time can be right truncated i.e. all of these are valid times: 2008-01-23T19:23:10+00:00; 2008-01-23T19:23:10; 2008-01-23; 2008-01; 2008; Except: 2008-01; 2008 all are ISO8601 compliant date and time {timestamp} 2018-05-11T10:00:00+01:00; 2018-05-11 environment M M M M M M M M M M M 1 9 MIXS:0000011 MIGS-5 neg_cont_type negative control type added to core X #199 The substance or equipment used as a negative control in an investigation enumeration or text [distilled water|phosphate buffer|empty collection device|empty collection tube|DNA-free PCR mix|sterile swab |sterile syringe] investigation C C C C C C C C C C C 1 MIXS:0001321 pos_cont_type positive control type added to core new term added as conditional mandatory for all checklists, this was suggested by biosamples in order to help categorise the type of control samples X #200 The substance, mixture, product, or apparatus used to verify that a process which is part of an investigation delivers a true positive. {term} or {text} investigation C C C C C C C C C C C 1 MIXS:0001322 -env_broad_scale broad-scale environmental context updated definition add synonym to end of definition X to do; Synonym: 'biome' [LS] LS MIXS core (manditory) #106 Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO’s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS The major environment type(s) where the sample was collected. Recommend subclasses of biome [ENVO:00000428]. Multiple terms can be separated by one or more pipes. {termLabel} {[termID]} oceanic epipelagic zone biome [ENVO:01000033] for annotating a water sample from the photic zone in middle of the Atlantic Ocean environment M M M M M M M M M M M 1 10 MIXS:0000012 -env_local_scale local environmental context updated definition add synonym to end of definition X to do; Synonym:'feature [LS], habitat LS MIXS core (manditory) #106 Report the entity or entities which are in the sample or specimen’s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS. Environmental entities having causal influences upon the entity at time of sampling. {termLabel} {[termID]} litter layer [ENVO:01000338]; Annotating a pooled sample taken from various vegetation layers in a forest consider: canopy [ENVO:00000047]|herb and fern layer [ENVO:01000337]|litter layer [ENVO:01000338]|understory [01000335]|shrub layer [ENVO:01000336]. environment M M M M M M M M M M M 1 11 MIXS:0000013 MIGS-6 (habitat) -env_medium environmental medium updated definition add synonym to end of definition X to do; Synonym: material [LS] LS MIXS core (manditory) #117 & #196 Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top). The material displaced by the entity at time of sampling. Recommend subclasses of environmental material [ENVO:00010483]. {termLabel} {[termID]} soil [ENVO:00001998]; Annotating a fish swimming in the upper 100 m of the Atlantic Ocean, consider: ocean water [ENVO:00002151]. Example: Annotating a duck on a pond consider: pond water [ENVO:00002228]|air [ENVO_00002005] environment M M M M M M M M M M M 1 12 MIXS:0000014 -env_package environmental package deprecated term agreed, remove from Core X "Similar to ""Investigation Type"" above, and do we need to make it a hidden field? ie users won't need to manually input the value as its implicit on which package they choose to use.; Agreed, this is not a used field [LS]" CIH MIXS core (manditory) https://github.com/GenomicsStandardsConsortium/mixs/issues/64 MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported enumeration [air|built environment|host-associated|human-associated|human-skin|human-oral|human-gut|human-vaginal|hydrocarbon resources-cores|hydrocarbon resources-fluids/swabs|microbial mat/biofilm|misc environment|plant-associated|sediment|soil|wastewater/sludge|water] soil mixs extension C C C C C C C C C C C 1 13 MIXS:0000019 (list MIXS ID for each package: air, ...) +env_broad_scale broad-scale environmental context updated definition add synonym to end of definition X to do; Synonym: 'biome' [LS] LS MIXS core (mandatory) #106 Report the major environmental system the sample or specimen came from. The system(s) identified should have a coarse spatial grain, to provide the general environmental context of where the sampling was done (e.g. in the desert or a rainforest). We recommend using subclasses of EnvO’s biome class: http://purl.obolibrary.org/obo/ENVO_00000428. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS The major environment type(s) where the sample was collected. Recommend subclasses of biome [ENVO:00000428]. Multiple terms can be separated by one or more pipes. {termLabel} {[termID]} oceanic epipelagic zone biome [ENVO:01000033] for annotating a water sample from the photic zone in middle of the Atlantic Ocean environment M M M M M M M M M M M 1 10 MIXS:0000012 +env_local_scale local environmental context updated definition add synonym to end of definition X to do; Synonym:'feature [LS], habitat LS MIXS core (mandatory) #106 Report the entity or entities which are in the sample or specimen’s local vicinity and which you believe have significant causal influences on your sample or specimen. We recommend using EnvO terms which are of smaller spatial grain than your entry for env_broad_scale. Terms, such as anatomical sites, from other OBO Library ontologies which interoperate with EnvO (e.g. UBERON) are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS. Environmental entities having causal influences upon the entity at time of sampling. {termLabel} {[termID]} litter layer [ENVO:01000338]; Annotating a pooled sample taken from various vegetation layers in a forest consider: canopy [ENVO:00000047]|herb and fern layer [ENVO:01000337]|litter layer [ENVO:01000338]|understory [01000335]|shrub layer [ENVO:01000336]. environment M M M M M M M M M M M 1 11 MIXS:0000013 MIGS-6 (habitat) +env_medium environmental medium updated definition add synonym to end of definition X to do; Synonym: material [LS] LS MIXS core (mandatory) #117 & #196 Report the environmental material(s) immediately surrounding the sample or specimen at the time of sampling. We recommend using subclasses of 'environmental material' (http://purl.obolibrary.org/obo/ENVO_00010483). EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS . Terms from other OBO ontologies are permissible as long as they reference mass/volume nouns (e.g. air, water, blood) and not discrete, countable entities (e.g. a tree, a leaf, a table top). The material displaced by the entity at time of sampling. Recommend subclasses of environmental material [ENVO:00010483]. {termLabel} {[termID]} soil [ENVO:00001998]; Annotating a fish swimming in the upper 100 m of the Atlantic Ocean, consider: ocean water [ENVO:00002151]. Example: Annotating a duck on a pond consider: pond water [ENVO:00002228]|air [ENVO_00002005] environment M M M M M M M M M M M 1 12 MIXS:0000014 +env_package environmental package deprecated term agreed, remove from Core X "Similar to ""Investigation Type"" above, and do we need to make it a hidden field? ie users won't need to manually input the value as its implicit on which package they choose to use.; Agreed, this is not a used field [LS]" CIH MIXS core (mandatory) https://github.com/GenomicsStandardsConsortium/mixs/issues/64 MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported enumeration [air|built environment|host-associated|human-associated|human-skin|human-oral|human-gut|human-vaginal|hydrocarbon resources-cores|hydrocarbon resources-fluids/swabs|microbial mat/biofilm|misc environment|plant-associated|sediment|soil|wastewater/sludge|water] soil mixs extension C C C C C C C C C C C 1 13 MIXS:0000019 (list MIXS ID for each package: air, ...) subspecf_gen_lin subspecific genetic lineage Updated definition Create ticket to unify this term, with this term and package term: host infra-specific name - see which term is used most widely for data, then add note in definition of the synonym fine as is CIH MIXS core (optional) #73 & #256 Information about the genetic distinctness of the sequenced organism below the subspecies level, e.g., serovar, serotype, biotype, ecotype, or any relevant genetic typing schemes like Group I plasmid. Subspecies should not be recorded in this term, but in the NCBI taxonomy. Supply both the lineage name and the lineage rank separated by a colon, e.g., biovar:abc123. Genetic lineage below lowest rank of NCBI taxonomy, which is subspecies, e.g. serovar, biotype, ecotype. {rank name}:{text} serovar:Newport nucleic acid sequence source C C C C C - - C - - - 1 14 MIXS:0000020 ploidy ploidy no change made. - Remove from Core, keep in MIGS Eu, also in MISAG, MIMAG only relevant to MIGS Eu, should we consider removing from Core to a MIGS Eu specific package? CIH MIXS core (optional) The ploidy level of the genome (e.g. allopolyploid, haploid, diploid, triploid, tetraploid). It has implications for the downstream study of duplicated gene and regions of the genomes (and perhaps for difficulties in assembly). For terms, please select terms listed under class ploidy (PATO:001374) of Phenotypic Quality Ontology (PATO), and for a browser of PATO (v 2018-03-27) please refer to http://purl.bioontology.org/ontology/PATO PATO {termLabel} {[termID]} allopolyploidy [PATO:0001379] nucleic acid sequence source X - - - - - - - - - - 1 15 MIXS:0000021 num_replicons number of replicons no change made. MIXS core (optional) Reports the number of replicons in a nuclear genome of eukaryotes, in the genome of a bacterium or archaea or the number of segments in a segmented virus. Always applied to the haploid chromosome count of a eukaryote for eukaryotes and bacteria: chromosomes (haploid count); for viruses: segments {integer} 2 nucleic acid sequence source X M - C - - - - - - - 1 16 MIXS:0000022 @@ -59,7 +59,7 @@ pcr_primers pcr primers no change made. stay in MIxS core to do MIXS core (op mid multiplex identifiers no change made. stay in the core Molecular barcodes, called Multiplex Identifiers (MIDs), that are used to specifically tag unique samples in a sequencing run. Sequence should be reported in uppercase letters multiplex identifier sequence {dna} GTGAATAT sequencing - - - - - C C - C C C 1 47 MIXS:0000047 adapters adapters no change made. stay in the core Adapters provide priming sequences for both amplification and sequencing of the sample-library fragments. Both adapters should be reported; in uppercase letters adapter A and B sequence {dna};{dna} AATGATACGGCGACCACCGAGATCTACACGCT;CAAGCAGAAGACGGCATACGAGAT sequencing C C C C C C C - C C C 1 48 MIXS:0000048 pcr_cond pcr conditions no change made. stay in the core fine as is CIH Description of reaction conditions and components of PCR in the form of 'initial denaturation:94degC_1.5min; annealing=...' initial denaturation:degrees_minutes;annealing:degrees_minutes;elongation:degrees_minutes;final elongation:degrees_minutes;total cycles initial denaturation:degrees_minutes;annealing:degrees_minutes;elongation:degrees_minutes;final elongation:degrees_minutes;total cycles initial denaturation:94_3;annealing:50_1;elongation:72_1.5;final elongation:72_10;35 sequencing - - - - - - C C - - - 1 49 MIXS:0000049 -seq_meth sequencing method Updated definition recommendation - look at OBI for the allowable terms , add to definition for options to look at OBI. For now, we are using the ENA approved checklist at https://ena-docs.readthedocs.io/en/latest/submit/reads/webin-cli.html#metadata-validation. update definition see #96 MIXS core (manditory) #96 Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103). Text or OBI {termLabel} {[termID]}|{text} 454 Genome Sequencer FLX [OBI:0000702] sequencing M M M M M M M M M M M 1 50 MIXS:0000050 MIGS-29 +seq_meth sequencing method Updated definition recommendation - look at OBI for the allowable terms , add to definition for options to look at OBI. For now, we are using the ENA approved checklist at https://ena-docs.readthedocs.io/en/latest/submit/reads/webin-cli.html#metadata-validation. update definition see #96 MIXS core (mandatory) #96 Sequencing machine used. Where possible the term should be taken from the OBI list of DNA sequencers (http://purl.obolibrary.org/obo/OBI_0400103). Text or OBI {termLabel} {[termID]}|{text} 454 Genome Sequencer FLX [OBI:0000702] sequencing M M M M M M M M M M M 1 50 MIXS:0000050 MIGS-29 seq_quality_check sequence quality check no change made. stay in core Indicate if the sequence has been called by automatic systems (none) or undergone a manual editing procedure (e.g. by inspecting the raw data or chromatograms). Applied only for sequences that are not submitted to SRA,ENA or DRA none or manually edited [none|manually edited] none sequencing - - - - - - C C - - - 1 51 MIXS:0000051 chimera_check chimera check software Updated name and definition MIMARKS term "definition needs work - should include what is expected value for this item. Also Jasper suggested changing name to include suffix ""software"" like other terms e.g. assembly software." CIH #97 Tool(s) used for chimera checking, including version number and parameters, to discover and remove chimeric sequences. A chimeric sequence is comprised of two or more phylogenetically distinct parent sequences. name and version of software, parameters used {software};{version};{parameters} uchime;v4.1;default parameters sequencing - - - - - - C C - - - 1 52 MIXS:0000052 tax_ident taxonomic identity marker no change made. move to checklist - MISAG, MIMAG, review the definition, can this point to a controlled vocabulary clarify definition and move to checklist for MISAG and MIMAG The phylogenetic marker(s) used to assign an organism name to the SAG or MAG enumeration [16S rRNA gene|multi-marker approach|other] other: rpoB gene sequencing C C C C C - - - M M X 1 53 MIXS:0000053 diff --git a/tests/test_121/input/schema_def.tsv b/tests/test_121/input/schema_def.tsv index 0015f44..40b5b1f 100644 --- a/tests/test_121/input/schema_def.tsv +++ b/tests/test_121/input/schema_def.tsv @@ -1,5 +1,5 @@ id schema name aliases broad_mappings categories close_mappings comments conforms_to contributors created_by created_on date default_prefix default_range deprecated deprecated_element_has_exact_replacement deprecated_element_has_possible_replacement description exact_mappings implements imports in_language instantiates last_updated_on dat license mappings modified_by narrow_mappings notes rank related_mappings see_also slot_names_unique source status title todos version alt_description_source alt_description_text flavor annotation local name source local name value structured_aliases literal_form structured_aliases alias_predicate structured_aliases categories in subset id_prefixes emit_prefixes default_curi_maps >id schema aliases broad_mappings categories close_mappings comments conforms_to contributors created_by ignore default_prefix default_range deprecated deprecated_element_has_exact_replacement deprecated_element_has_possible_replacement description exact_mappings implements ignore in_language ignore ignore license mappings modified_by narrow_mappings notes rank related_mappings see_also slot_names_unique source status title todos version alt_descriptions alt_descriptions annotations local_names local_names structured_aliases structured_aliases structured_aliases in_subset id_prefixes emit_prefixes default_curi_maps > internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' -> inner_key: source inner_key: text inner_key: flavor inner_key: source inner_key: value inner_key: literal_form inner_key: alias_predicate inner_key: categories "" -http://example.com/some_schema some schema text1|text2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 the overall usefulness of default_curi_maps is debatable|there may be some improper modeling in here like illegal ranges. what software will detect that?|what about numeric, date and booleans that get converted to strings?|what are implements and instantiates good for?|how well does including an imports statement in a schemsheets TSV work?|todo what about multivalued slots with multiple inner keys?|what does the schema repair method do? LinkML some_schema:1|some_schema:2 some_schema:1 some_schema float we all feel deprecated some times some_schema:1 some_schema:1 A schema that tests as many elements as possible. For use in testing YAML <-> sheets some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 English some_schema:1|some_schema:2 MIT some_schema:1|some_schema:2 some_schema:1 some_schema:1|some_schema:2 text1|text2 3 some_schema:1|some_schema:2 some_schema:1|some_schema:2 TRUE some_schema:1 some_schema:1 See description SETTINGS! v0.0.1 wiktionary "An outline or image universally applicable to a general conception, under which it is likely to be presented to the mind" raspberry logic format schema_definition EXACT_SYNONYM some_schema:1|some_schema:2 main_subset|secret_subset data_prefix_1|data_prefix_2 data_prefix_1|data_prefix_2|non_data_prefix semweb_context|idot_context \ No newline at end of file +> inner_key: source inner_key: description inner_key: flavor inner_key: source inner_key: value inner_key: literal_form inner_key: predicate inner_key: categories "" +http://example.com/some_schema some_schema text1|text2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 the overall usefulness of default_curi_maps is debatable|there may be some improper modeling in here like illegal ranges. what software will detect that?|what about numeric, date and booleans that get converted to strings?|what are implements and instantiates good for?|how well does including an imports statement in a schemsheets TSV work?|todo what about multivalued slots with multiple inner keys?|what does the schema repair method do? LinkML some_schema:1|some_schema:2 some_schema:1 some_schema float we all feel deprecated some times some_schema:1 some_schema:1 A schema that tests as many elements as possible. For use in testing YAML <-> sheets some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 English some_schema:1|some_schema:2 MIT some_schema:1|some_schema:2 some_schema:1 some_schema:1|some_schema:2 text1|text2 3 some_schema:1|some_schema:2 some_schema:1|some_schema:2 TRUE some_schema:1 some_schema:1 See description SETTINGS! v0.0.1 wiktionary "An outline or image universally applicable to a general conception, under which it is likely to be presented to the mind" raspberry logic format schema_definition EXACT_SYNONYM some_schema:1|some_schema:2 main_subset|secret_subset data_prefix_1|data_prefix_2 data_prefix_1|data_prefix_2|non_data_prefix semweb_context|idot_context diff --git a/tests/test_121/test_mixs_generation.py b/tests/test_121/test_mixs_generation.py index 6cff4b8..e64ec81 100644 --- a/tests/test_121/test_mixs_generation.py +++ b/tests/test_121/test_mixs_generation.py @@ -1,13 +1,5 @@ -import csv -import logging import os -import pprint - -from linkml.generators.projectgen import ProjectGenerator, ProjectConfiguration from linkml_runtime.dumpers import yaml_dumper -from linkml_runtime.utils.schema_as_dict import schema_as_dict -from linkml_runtime.utils.schemaview import SchemaView -from linkml.utils.helpers import write_to_file from schemasheets.schemamaker import SchemaMaker, get_metamodel, SchemaSheetRowException