Skip to content

Commit

Permalink
Add initial version of micronutrient extraction template (and other n…
Browse files Browse the repository at this point in the history
…ew templates) (#469)
  • Loading branch information
caufieldjh authored Nov 4, 2024
2 parents a5238b2 + 0006a7b commit 16b40e2
Show file tree
Hide file tree
Showing 10 changed files with 2,171 additions and 0 deletions.
316 changes: 316 additions & 0 deletions src/ontogpt/templates/ecosim_methods.py

Large diffs are not rendered by default.

99 changes: 99 additions & 0 deletions src/ontogpt/templates/ecosim_methods.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
id: http://w3id.org/ontogpt/ecosim_methods
name: ecosim_methods
title: EcoSIM Methods Extraction Template
description: >-
EcoSIM Methods Extraction Template
license: https://creativecommons.org/publicdomain/zero/1.0/
prefixes:
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
linkml: https://w3id.org/linkml/
ecosim_simple: http://w3id.org/ontogpt/ecosim_simple
ecosim: http://purl.obolibrary.org/obo/ecosim

default_prefix: ecosim_methods
default_range: string

imports:
- linkml:types
- core

classes:
TermSet:
tree_root: true
is_a: NamedEntity
attributes:
locations:
range: Location
multivalued: true
description: >-
A semicolon-separated list of research locations.
Examples include: Vermont, New York City,
Ethiopia
methods:
range: Method
multivalued: true
description: >-
A semicolon-separated list of methods used in
environmental and earth science research. Examples
include: sampling, spectroscopy
variables:
range: Variable
description: >-
A semicolon-separated list of variables measured in
environmental and earth science research. Examples
include: root shape, biomass, water turbidity
equipments:
range: Equipment
description: >-
A semicolon-separated list of equipment used in
environmental and earth science research.
equipment_to_variable_relationships:
range: EquipmentMeasuresVariable
description: >-
A semicolon separated list of relationships
between specific equipment and variables
they are used to measure as described in the input.
Example: NMR spectrometer was used to measure
chemical content
multivalued: true
inlined: true

Location:
is_a: NamedEntity
annotations:
prompt: >-
The name of a location used in research.
Method:
is_a: NamedEntity
annotations:
annotators: bioportal:ECOSIM
prompt: >-
The name of a method used in environment and
earth science research.
Variable:
is_a: NamedEntity
annotations:
annotators: bioportal:ECOSIM
prompt: >-
The name of a variable measured in environment and
earth science research.
Equipment:
is_a: NamedEntity
annotations:
prompt: >-
The name of a piece of equipment used in
environment and earth science research.
EquipmentMeasuresVariable:
is_a: CompoundExpression
attributes:
equipment:
range: Equipment
description: Name of the equipment used to measure a variable.
variable:
range: Variable
description: Name of the variable being measured.

243 changes: 243 additions & 0 deletions src/ontogpt/templates/ecosim_simple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
from __future__ import annotations
from datetime import (
datetime,
date,
time
)
from decimal import Decimal
from enum import Enum
import re
import sys
from typing import (
Any,
ClassVar,
List,
Literal,
Dict,
Optional,
Union
)
from pydantic import (
BaseModel,
ConfigDict,
Field,
RootModel,
field_validator
)
metamodel_version = "None"
version = "None"


class ConfiguredBaseModel(BaseModel):
model_config = ConfigDict(
validate_assignment = True,
validate_default = True,
extra = "forbid",
arbitrary_types_allowed = True,
use_enum_values = True,
strict = False,
)
pass




class LinkMLMeta(RootModel):
root: Dict[str, Any] = {}
model_config = ConfigDict(frozen=True)

def __getattr__(self, key:str):
return getattr(self.root, key)

def __getitem__(self, key:str):
return self.root[key]

def __setitem__(self, key:str, value):
self.root[key] = value

def __contains__(self, key:str) -> bool:
return key in self.root


linkml_meta = LinkMLMeta({'default_prefix': 'ecosim_simple',
'default_range': 'string',
'description': 'Simple EcoSIM Extraction Template',
'id': 'http://w3id.org/ontogpt/ecosim_simple',
'imports': ['linkml:types', 'core'],
'license': 'https://creativecommons.org/publicdomain/zero/1.0/',
'name': 'ecosim_simple',
'prefixes': {'ecosim': {'prefix_prefix': 'ecosim',
'prefix_reference': 'http://purl.obolibrary.org/obo/ecosim'},
'ecosim_simple': {'prefix_prefix': 'ecosim_simple',
'prefix_reference': 'http://w3id.org/ontogpt/ecosim_simple'},
'linkml': {'prefix_prefix': 'linkml',
'prefix_reference': 'https://w3id.org/linkml/'},
'rdf': {'prefix_prefix': 'rdf',
'prefix_reference': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}},
'source_file': '/home/harry/ontogpt/src/ontogpt/templates/ecosim_simple.yaml',
'title': 'Simple EcoSIM Extraction Template'} )

class NullDataOptions(str, Enum):
UNSPECIFIED_METHOD_OF_ADMINISTRATION = "UNSPECIFIED_METHOD_OF_ADMINISTRATION"
NOT_APPLICABLE = "NOT_APPLICABLE"
NOT_MENTIONED = "NOT_MENTIONED"



class ExtractionResult(ConfiguredBaseModel):
"""
A result of extracting knowledge on text
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

input_id: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'input_id', 'domain_of': ['ExtractionResult']} })
input_title: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'input_title', 'domain_of': ['ExtractionResult']} })
input_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'input_text', 'domain_of': ['ExtractionResult']} })
raw_completion_output: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'raw_completion_output', 'domain_of': ['ExtractionResult']} })
prompt: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'prompt', 'domain_of': ['ExtractionResult']} })
extracted_object: Optional[Any] = Field(None, description="""The complex objects extracted from the text""", json_schema_extra = { "linkml_meta": {'alias': 'extracted_object', 'domain_of': ['ExtractionResult']} })
named_entities: Optional[List[Any]] = Field(None, description="""Named entities extracted from the text""", json_schema_extra = { "linkml_meta": {'alias': 'named_entities', 'domain_of': ['ExtractionResult']} })


class NamedEntity(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'http://w3id.org/ontogpt/core'})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })


class CompoundExpression(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'http://w3id.org/ontogpt/core'})

pass


class Triple(CompoundExpression):
"""
Abstract parent for Relation Extraction tasks
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, 'from_schema': 'http://w3id.org/ontogpt/core'})

subject: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'subject', 'domain_of': ['Triple']} })
predicate: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'predicate', 'domain_of': ['Triple']} })
object: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'object', 'domain_of': ['Triple']} })
qualifier: Optional[str] = Field(None, description="""A qualifier for the statements, e.g. \"NOT\" for negation""", json_schema_extra = { "linkml_meta": {'alias': 'qualifier', 'domain_of': ['Triple']} })
subject_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the subject of the statement, e.g. \"high dose\" or \"intravenously administered\"""", json_schema_extra = { "linkml_meta": {'alias': 'subject_qualifier', 'domain_of': ['Triple']} })
object_qualifier: Optional[str] = Field(None, description="""An optional qualifier or modifier for the object of the statement, e.g. \"severe\" or \"with additional complications\"""", json_schema_extra = { "linkml_meta": {'alias': 'object_qualifier', 'domain_of': ['Triple']} })


class TextWithTriples(ConfiguredBaseModel):
"""
A text containing one or more relations of the Triple type.
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

publication: Optional[Publication] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'publication',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'domain_of': ['TextWithTriples', 'TextWithEntity']} })
triples: Optional[List[Triple]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'triples', 'domain_of': ['TextWithTriples']} })


class TextWithEntity(ConfiguredBaseModel):
"""
A text containing one or more instances of a single type of entity.
"""
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

publication: Optional[Publication] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'publication',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'domain_of': ['TextWithTriples', 'TextWithEntity']} })
entities: Optional[List[str]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'entities', 'domain_of': ['TextWithEntity']} })


class RelationshipType(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core',
'id_prefixes': ['RO', 'biolink']})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })


class Publication(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

id: Optional[str] = Field(None, description="""The publication identifier""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedEntity', 'Publication']} })
title: Optional[str] = Field(None, description="""The title of the publication""", json_schema_extra = { "linkml_meta": {'alias': 'title', 'domain_of': ['Publication']} })
abstract: Optional[str] = Field(None, description="""The abstract of the publication""", json_schema_extra = { "linkml_meta": {'alias': 'abstract', 'domain_of': ['Publication']} })
combined_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'combined_text', 'domain_of': ['Publication']} })
full_text: Optional[str] = Field(None, description="""The full text of the publication""", json_schema_extra = { "linkml_meta": {'alias': 'full_text', 'domain_of': ['Publication']} })


class AnnotatorResult(ConfiguredBaseModel):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/core'})

subject_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'subject_text', 'domain_of': ['AnnotatorResult']} })
object_id: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'object_id', 'domain_of': ['AnnotatorResult']} })
object_text: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'object_text', 'domain_of': ['AnnotatorResult']} })


class TermSet(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'http://w3id.org/ontogpt/ecosim_simple', 'tree_root': True})

terms: Optional[List[str]] = Field(None, description="""A semicolon-separated list of variables for earth system simulation. Do not include abbreviations in parentheses, e.g., \"Carbon (C)\" should be represented as \"carbon\". Examples include: carboxylation, sodium, underground irrigation.""", json_schema_extra = { "linkml_meta": {'alias': 'terms', 'domain_of': ['TermSet']} })
id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })


class Term(NamedEntity):
linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'annotations': {'annotators': {'tag': 'annotators',
'value': 'bioportal:ECOSIM'},
'prompt': {'tag': 'prompt',
'value': 'The name of a variable for earth system '
'simulation.'}},
'from_schema': 'http://w3id.org/ontogpt/ecosim_simple'})

id: str = Field(..., description="""A unique identifier for the named entity""", json_schema_extra = { "linkml_meta": {'alias': 'id',
'annotations': {'prompt.skip': {'tag': 'prompt.skip', 'value': 'true'}},
'comments': ['this is populated during the grounding and normalization step'],
'domain_of': ['NamedEntity', 'Publication']} })
label: Optional[str] = Field(None, description="""The label (name) of the named thing""", json_schema_extra = { "linkml_meta": {'alias': 'label',
'aliases': ['name'],
'annotations': {'owl': {'tag': 'owl',
'value': 'AnnotationProperty, AnnotationAssertion'}},
'domain_of': ['NamedEntity'],
'slot_uri': 'rdfs:label'} })


# Model rebuild
# see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model
ExtractionResult.model_rebuild()
NamedEntity.model_rebuild()
CompoundExpression.model_rebuild()
Triple.model_rebuild()
TextWithTriples.model_rebuild()
TextWithEntity.model_rebuild()
RelationshipType.model_rebuild()
Publication.model_rebuild()
AnnotatorResult.model_rebuild()
TermSet.model_rebuild()
Term.model_rebuild()
39 changes: 39 additions & 0 deletions src/ontogpt/templates/ecosim_simple.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
id: http://w3id.org/ontogpt/ecosim_simple
name: ecosim_simple
title: Simple EcoSIM Extraction Template
description: >-
Simple EcoSIM Extraction Template
license: https://creativecommons.org/publicdomain/zero/1.0/
prefixes:
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
linkml: https://w3id.org/linkml/
ecosim_simple: http://w3id.org/ontogpt/ecosim_simple
ecosim: http://purl.obolibrary.org/obo/ecosim

default_prefix: ecosim_simple
default_range: string

imports:
- linkml:types
- core

classes:
TermSet:
tree_root: true
is_a: NamedEntity
attributes:
terms:
range: Term
multivalued: true
description: >-
A semicolon-separated list of variables
for earth system simulation. Do not include
abbreviations in parentheses, e.g., "Carbon (C)"
should be represented as "carbon". Examples include: carboxylation, sodium, underground irrigation.
Term:
is_a: NamedEntity
annotations:
annotators: bioportal:ECOSIM
prompt: >-
The name of a variable for earth system simulation.
Loading

0 comments on commit 16b40e2

Please sign in to comment.