Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

templated all SchemaDefinition slots including settings #122

Merged
merged 3 commits into from
Feb 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
.DS_Store
target

examples/output

Expand Down
54 changes: 36 additions & 18 deletions schemasheets/schemamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime.linkml_model import Annotation, Example
from linkml_runtime.linkml_model.meta import SchemaDefinition, ClassDefinition, Prefix, \
SlotDefinition, EnumDefinition, PermissibleValue, SubsetDefinition, TypeDefinition, Element
SlotDefinition, EnumDefinition, PermissibleValue, SubsetDefinition, TypeDefinition, Element, Setting
from linkml_runtime.utils.schema_as_dict import schema_as_dict
from linkml_runtime.utils.schemaview import SchemaView, re

from schemasheets.schemasheet_datamodel import ColumnConfig, TableConfig, get_configmodel, get_metamodel, COL_NAME, \
DESCRIPTOR, \
tmap, T_CLASS, T_PV, T_SLOT, T_SUBSET, T_SCHEMA, T_ENUM, T_PREFIX, T_TYPE, SchemaSheet
tmap, T_CLASS, T_PV, T_SLOT, T_SUBSET, T_SCHEMA, T_ENUM, T_PREFIX, T_TYPE, SchemaSheet, T_SETTING
from schemasheets.conf.configschema import Cardinality
from schemasheets.utils.google_sheets import gsheets_download_url
from schemasheets.utils.prefixtool import guess_prefix_expansion
Expand Down Expand Up @@ -83,7 +83,6 @@ def _tidy_slot_usage(self):
c.slots.remove(sn)
del c.slot_usage[sn]


def merge_sheet(self, file_name: str, delimiter='\t') -> None:
"""
Merge information from the given schema sheet into the current schema
Expand All @@ -93,15 +92,15 @@ def merge_sheet(self, file_name: str, delimiter='\t') -> None:
:return:
"""
logging.info(f'READING {file_name} D={delimiter}')
#with self.ensure_file(file_name) as tsv_file:
# with self.ensure_file(file_name) as tsv_file:
# reader = csv.DictReader(tsv_file, delimiter=delimiter)
with self.ensure_csvreader(file_name, delimiter=delimiter) as reader:
schemasheet = SchemaSheet.from_dictreader(reader)
if self.table_config_path:
schemasheet.load_table_config(self.table_config_path)
line_num = schemasheet.start_line_number
# TODO: check why this doesn't work
#while rows and all(x for x in rows[-1] if not x):
# while rows and all(x for x in rows[-1] if not x):
# print(f'TRIMMING: {rows[-1]}')
# rows.pop()
logging.info(f'ROWS={len(schemasheet.rows)}')
Expand All @@ -118,21 +117,29 @@ def add_row(self, row: Dict[str, Any], table_config: TableConfig):
name = element.prefix_prefix
elif isinstance(element, PermissibleValue):
name = element.text
elif isinstance(element, Setting):
# print(f"\n{element = }")
name = element.setting_key
else:
logging.debug(f'EL={element} in {row}')
name = element.name
logging.debug(f'ADDING: {row} // {name}')
for k, v in row.items():
# print(f"\n{k = }")
if k not in table_config.columns:
raise ValueError(f'Expected to find {k} in {table_config.columns.keys()}')
cc = table_config.columns[k]
# print(f"{cc = }")
v = self.normalize_value(v, cc)
if v:
# print(f"{v = }")
# special case: class-context provided by settings
if cc.settings.applies_to_class:
actual_element = list(self.row_focal_element(row, table_config, column=k))[0]
else:
actual_element = element
# print(f"{cc.maps_to = }")
# print(f"{cc = }")
logging.debug(f'SETTING {name} {cc.maps_to} = {v}')
if cc.maps_to == 'cardinality':
self.set_cardinality(actual_element, v)
Expand Down Expand Up @@ -172,9 +179,13 @@ def add_row(self, row: Dict[str, Any], table_config: TableConfig):
curr_val = getattr(curr_obj, cc.settings.inner_key, None)
else:
curr_val = getattr(actual_element, cc.maps_to)
# print(f"{curr_val = }")
# print(f"{v = }")

if curr_val and curr_val != 'TEMP' and curr_val != v and \
not isinstance(actual_element, SchemaDefinition) and \
not isinstance(actual_element, Prefix):
not isinstance(actual_element, Prefix) and \
not isinstance(actual_element, Setting):
logging.warning(f'Overwriting value for {k}, was {curr_val}, now {v}')
raise ValueError(f'Cannot reset value for {k}, was {curr_val}, now {v}')
if cc.settings.inner_key:
Expand Down Expand Up @@ -288,17 +299,25 @@ def row_focal_element(self, row: Dict[str, Any], table_config: TableConfig,
pfx = Prefix(vs[0], 'TODO')
self.schema.prefixes[pfx.prefix_prefix] = pfx
vmap[k] = [pfx]
elif elt_cls == Setting:
if len(vs) != 1:
raise ValueError(f'Cardinality of setting col must be 1; got: {vs}')
stg = Setting(vs[0], 'TODO')
self.schema.settings[stg.setting_key] = stg
vmap[k] = [stg]
elif elt_cls == SchemaDefinition:
if len(vs) != 1:
raise ValueError(f'Cardinality of schema col must be 1; got: {vs}')
self.schema.name = vs[0]
vmap[k] = [self.schema]
else:
vmap[k] = [self.get_current_element(elt_cls(v)) for v in vs]

def check_excess(descriptors):
diff = set(vmap.keys()) - set(descriptors + [T_SCHEMA])
if len(diff) > 0:
raise ValueError(f'Excess slots: {diff}')

if column:
cc = table_config.columns[column]
if cc.settings.applies_to_class:
Expand Down Expand Up @@ -351,7 +370,7 @@ def check_excess(descriptors):
this_enum: EnumDefinition = vmap[T_ENUM][0]
if T_PV in vmap:
for pv in vmap[T_PV]:
#pv = PermissibleValue(text=v)
# pv = PermissibleValue(text=v)
this_enum.permissible_values[pv.text] = pv
yield pv
else:
Expand All @@ -368,6 +387,9 @@ def check_excess(descriptors):
elif T_SCHEMA in vmap:
for main_elt in vmap[T_SCHEMA]:
yield main_elt
elif T_SETTING in vmap:
for main_elt in vmap[T_SETTING]:
yield main_elt
else:
raise ValueError(f'Could not find a focal element for {row}')

Expand Down Expand Up @@ -419,7 +441,8 @@ def normalize_value(self, v: str, column_config: ColumnConfig = None) -> Any:
v = None
if column_config.settings.curie_prefix:
if ':' in v:
logging.warning(f'Will not prefix {v} with {column_config.settings.curie_prefix} as it is already prefixed')
logging.warning(
f'Will not prefix {v} with {column_config.settings.curie_prefix} as it is already prefixed')
else:
v = f'{column_config.settings.curie_prefix}:{v}'
if column_config.settings.prefix:
Expand Down Expand Up @@ -537,8 +560,8 @@ def repair_schema(self, schema: SchemaDefinition) -> SchemaDefinition:
:return:
"""
sv = SchemaView(schema)
#pfx = schema.default_prefix
#if pfx not in schema.prefixes:
# pfx = schema.default_prefix
# if pfx not in schema.prefixes:
# schema.prefixes[pfx] = Prefix(pfx, f'http://example.org/{pfx}/')
# logging.info(f'Set default prefix: {schema.prefixes[pfx]}')
prefixes = set()
Expand Down Expand Up @@ -622,7 +645,8 @@ def ensure_csvreader(self, file_name: str, delimiter=None) -> str:
help="Google sheets ID. If this is specified then the arguments MUST be sheet names")
@click.option("-v", "--verbose", count=True)
@click.argument('tsv_files', nargs=-1)
def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_path: str, use_attributes: bool, unique_slots: bool, verbose: int, sort_keys: bool):
def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_path: str, use_attributes: bool,
unique_slots: bool, verbose: int, sort_keys: bool):
"""
Convert schemasheets to a LinkML schema

Expand Down Expand Up @@ -653,14 +677,8 @@ def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_pat
schema = sm.repair_schema(schema)
schema_dict = schema_as_dict(schema)
output.write(yaml.dump(schema_dict, sort_keys=sort_keys))
#output.write(yaml_dumper.dumps(schema))
# output.write(yaml_dumper.dumps(schema))


if __name__ == '__main__':
convert()






6 changes: 5 additions & 1 deletion schemasheets/schemasheet_datamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import yaml
from linkml_runtime.linkml_model import SlotDefinition, ClassDefinition, SchemaDefinition, \
PermissibleValue, EnumDefinition, TypeDefinition, SubsetDefinition, Prefix
from linkml_runtime.linkml_model.meta import Setting

from linkml_runtime.utils.schemaview import SchemaView

from schemasheets.conf.configschema import ColumnSettings, Shortcuts
Expand All @@ -26,6 +28,7 @@
T_TYPE = 'type'
T_SUBSET = 'subset'
T_PREFIX = 'prefix'
T_SETTING = 'setting'

tmap = {
T_SCHEMA: SchemaDefinition,
Expand All @@ -35,7 +38,8 @@
T_PV: PermissibleValue,
T_TYPE: TypeDefinition,
T_SUBSET: SubsetDefinition,
T_PREFIX: Prefix
T_PREFIX: Prefix,
T_SETTING: Setting,
}


Expand Down
5 changes: 5 additions & 0 deletions tests/test_121/input/class_defs.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class description
>class description
Vehicle A machine, with or wihtout its own power source, that eases the transportation of people, materials, etc.
Airplane "A vehicle which flies through the air, obtaining lif from air flowing acoss fixed wings"
Boat A vehicle which moves through water
6 changes: 6 additions & 0 deletions tests/test_121/input/prefix_defs.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
prefix URI
> prefix prefix_reference
some_schema http://example.com/some_schema_path/
data_prefix_1 http://example.com/data_prefix_1/
data_prefix_2 http://example.com/data_prefix_2/
non_data_prefix http://example.com/non_data_prefix/
5 changes: 5 additions & 0 deletions tests/test_121/input/schema_def.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id schema name aliases broad_mappings categories close_mappings comments conforms_to contributors created_by created_on date default_prefix default_range deprecated deprecated_element_has_exact_replacement deprecated_element_has_possible_replacement description exact_mappings implements imports in_language instantiates last_updated_on dat license mappings modified_by narrow_mappings notes rank related_mappings see_also slot_names_unique source status title todos version alt_description_source alt_description_text flavor annotation local name source local name value structured_aliases literal_form structured_aliases alias_predicate structured_aliases categories in subset id_prefixes emit_prefixes default_curi_maps
>id schema aliases broad_mappings categories close_mappings comments conforms_to contributors created_by ignore default_prefix default_range deprecated deprecated_element_has_exact_replacement deprecated_element_has_possible_replacement description exact_mappings implements ignore in_language ignore ignore license mappings modified_by narrow_mappings notes rank related_mappings see_also slot_names_unique source status title todos version alt_descriptions alt_descriptions annotations local_names local_names structured_aliases structured_aliases structured_aliases in_subset id_prefixes emit_prefixes default_curi_maps
> internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|' internal_separator: '|'
> inner_key: source inner_key: text inner_key: flavor inner_key: source inner_key: value inner_key: literal_form inner_key: alias_predicate inner_key: categories ""
http://example.com/some_schema some schema text1|text2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 the overall usefulness of default_curi_maps is debatable|there may be some improper modeling in here like illegal ranges. what software will detect that?|what about numeric, date and booleans that get converted to strings?|what are implements and instantiates good for?|how well does including an imports statement in a schemsheets TSV work?|todo what about multivalued slots with multiple inner keys?|what does the schema repair method do? LinkML some_schema:1|some_schema:2 some_schema:1 some_schema float we all feel deprecated some times some_schema:1 some_schema:1 A schema that tests as many elements as possible. For use in testing YAML <-> sheets some_schema:1|some_schema:2 some_schema:1|some_schema:2 some_schema:1|some_schema:2 English some_schema:1|some_schema:2 MIT some_schema:1|some_schema:2 some_schema:1 some_schema:1|some_schema:2 text1|text2 3 some_schema:1|some_schema:2 some_schema:1|some_schema:2 TRUE some_schema:1 some_schema:1 See description SETTINGS! v0.0.1 wiktionary "An outline or image universally applicable to a general conception, under which it is likely to be presented to the mind" raspberry logic format schema_definition EXACT_SYNONYM some_schema:1|some_schema:2 main_subset|secret_subset data_prefix_1|data_prefix_2 data_prefix_1|data_prefix_2|non_data_prefix semweb_context|idot_context
3 changes: 3 additions & 0 deletions tests/test_121/input/setting_defs.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
setting name setting expansion
>setting setting_value
vowels [aeiouAEIOU]+
3 changes: 3 additions & 0 deletions tests/test_121/input/setting_defs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
setting name setting expansion
>setting setting_value
vowels [aeiouAEIOU]+
4 changes: 4 additions & 0 deletions tests/test_121/input/slot_class_assignments.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class slot
>class slot
Vehicle exterior_color
Vehicle max_passengers
4 changes: 4 additions & 0 deletions tests/test_121/input/slot_defs.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
slot description
>slot description
exterior_color the primary color, using crayola names, of the exterior of the vehicle
max_passengers That maximum number of human passengers that can be safely transported by the vehicle
4 changes: 4 additions & 0 deletions tests/test_121/input/subset_defs.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Subset desc
> subset description
main_subset main subset
supplementary_subset supplementary subset
79 changes: 79 additions & 0 deletions tests/test_121/output/mixs_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
name: some schema
conforms_to: LinkML
implements:
- some_schema:1
- some_schema:2
description: A schema that tests as many elements as possible. For use in testing
YAML <-> sheets
alt_descriptions:
text: An outline or image universally applicable to a general conception, under
which it is likely to be presented to the mind
title: See description
deprecated: we all feel deprecated some times
todos:
- text1
- text2
notes:
- text1
- text2
comments:
- text1
- text2
source: some_schema:1
in_language: English
see_also:
- some_schema:1
- some_schema:2
deprecated_element_has_exact_replacement: some_schema:1
deprecated_element_has_possible_replacement: some_schema:1
aliases:
- text1
- text2
mappings:
- some_schema:1
- some_schema:2
exact_mappings:
- some_schema:1
- some_schema:2
close_mappings:
- some_schema:1
- some_schema:2
related_mappings:
- some_schema:1
- some_schema:2
narrow_mappings:
- some_schema:1
- some_schema:2
broad_mappings:
- some_schema:1
- some_schema:2
created_by: some_schema:1
created_on: '2023-01-01'
last_updated_on: '2023-01-01'
modified_by: some_schema:1
status: some_schema:1
rank: '3'
id: http://example.com/some_schema
version: v0.0.1
imports:
- linkml:types
license: MIT
prefixes:
some_schema:
prefix_prefix: some_schema
prefix_reference: http://example.com/some_schema_path/
linkml:
prefix_prefix: linkml
prefix_reference: https://w3id.org/linkml/
default_prefix: some_schema
default_range: float
slot_names_unique: true
categories:
- some_schema:1
- some_schema:2
keywords:
- text1
- text2
contributors:
- some_schema:1
- some_schema:2
Loading
Loading