Skip to content

Commit

Permalink
handles schema Settings
Browse files Browse the repository at this point in the history
  • Loading branch information
turbomam committed Sep 5, 2023
1 parent b97ba63 commit f27b272
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 31 deletions.
54 changes: 36 additions & 18 deletions schemasheets/schemamaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
from linkml_runtime.dumpers import yaml_dumper
from linkml_runtime.linkml_model import Annotation, Example
from linkml_runtime.linkml_model.meta import SchemaDefinition, ClassDefinition, Prefix, \
SlotDefinition, EnumDefinition, PermissibleValue, SubsetDefinition, TypeDefinition, Element
SlotDefinition, EnumDefinition, PermissibleValue, SubsetDefinition, TypeDefinition, Element, Setting
from linkml_runtime.utils.schema_as_dict import schema_as_dict
from linkml_runtime.utils.schemaview import SchemaView, re

from schemasheets.schemasheet_datamodel import ColumnConfig, TableConfig, get_configmodel, get_metamodel, COL_NAME, \
DESCRIPTOR, \
tmap, T_CLASS, T_PV, T_SLOT, T_SUBSET, T_SCHEMA, T_ENUM, T_PREFIX, T_TYPE, SchemaSheet
tmap, T_CLASS, T_PV, T_SLOT, T_SUBSET, T_SCHEMA, T_ENUM, T_PREFIX, T_TYPE, SchemaSheet, T_SETTING
from schemasheets.conf.configschema import Cardinality
from schemasheets.utils.google_sheets import gsheets_download_url
from schemasheets.utils.prefixtool import guess_prefix_expansion
Expand Down Expand Up @@ -83,7 +83,6 @@ def _tidy_slot_usage(self):
c.slots.remove(sn)
del c.slot_usage[sn]


def merge_sheet(self, file_name: str, delimiter='\t') -> None:
"""
Merge information from the given schema sheet into the current schema
Expand All @@ -93,15 +92,15 @@ def merge_sheet(self, file_name: str, delimiter='\t') -> None:
:return:
"""
logging.info(f'READING {file_name} D={delimiter}')
#with self.ensure_file(file_name) as tsv_file:
# with self.ensure_file(file_name) as tsv_file:
# reader = csv.DictReader(tsv_file, delimiter=delimiter)
with self.ensure_csvreader(file_name, delimiter=delimiter) as reader:
schemasheet = SchemaSheet.from_dictreader(reader)
if self.table_config_path:
schemasheet.load_table_config(self.table_config_path)
line_num = schemasheet.start_line_number
# TODO: check why this doesn't work
#while rows and all(x for x in rows[-1] if not x):
# while rows and all(x for x in rows[-1] if not x):
# print(f'TRIMMING: {rows[-1]}')
# rows.pop()
logging.info(f'ROWS={len(schemasheet.rows)}')
Expand All @@ -118,21 +117,29 @@ def add_row(self, row: Dict[str, Any], table_config: TableConfig):
name = element.prefix_prefix
elif isinstance(element, PermissibleValue):
name = element.text
elif isinstance(element, Setting):
# print(f"\n{element = }")
name = element.setting_key
else:
logging.debug(f'EL={element} in {row}')
name = element.name
logging.debug(f'ADDING: {row} // {name}')
for k, v in row.items():
# print(f"\n{k = }")
if k not in table_config.columns:
raise ValueError(f'Expected to find {k} in {table_config.columns.keys()}')
cc = table_config.columns[k]
# print(f"{cc = }")
v = self.normalize_value(v, cc)
if v:
# print(f"{v = }")
# special case: class-context provided by settings
if cc.settings.applies_to_class:
actual_element = list(self.row_focal_element(row, table_config, column=k))[0]
else:
actual_element = element
# print(f"{cc.maps_to = }")
# print(f"{cc = }")
logging.debug(f'SETTING {name} {cc.maps_to} = {v}')
if cc.maps_to == 'cardinality':
self.set_cardinality(actual_element, v)
Expand Down Expand Up @@ -172,9 +179,13 @@ def add_row(self, row: Dict[str, Any], table_config: TableConfig):
curr_val = getattr(curr_obj, cc.settings.inner_key, None)
else:
curr_val = getattr(actual_element, cc.maps_to)
# print(f"{curr_val = }")
# print(f"{v = }")

if curr_val and curr_val != 'TEMP' and curr_val != v and \
not isinstance(actual_element, SchemaDefinition) and \
not isinstance(actual_element, Prefix):
not isinstance(actual_element, Prefix) and \
not isinstance(actual_element, Setting):
logging.warning(f'Overwriting value for {k}, was {curr_val}, now {v}')
raise ValueError(f'Cannot reset value for {k}, was {curr_val}, now {v}')
if cc.settings.inner_key:
Expand Down Expand Up @@ -288,17 +299,25 @@ def row_focal_element(self, row: Dict[str, Any], table_config: TableConfig,
pfx = Prefix(vs[0], 'TODO')
self.schema.prefixes[pfx.prefix_prefix] = pfx
vmap[k] = [pfx]
elif elt_cls == Setting:
if len(vs) != 1:
raise ValueError(f'Cardinality of setting col must be 1; got: {vs}')
stg = Setting(vs[0], 'TODO')
self.schema.settings[stg.setting_key] = stg
vmap[k] = [stg]
elif elt_cls == SchemaDefinition:
if len(vs) != 1:
raise ValueError(f'Cardinality of schema col must be 1; got: {vs}')
self.schema.name = vs[0]
vmap[k] = [self.schema]
else:
vmap[k] = [self.get_current_element(elt_cls(v)) for v in vs]

def check_excess(descriptors):
diff = set(vmap.keys()) - set(descriptors + [T_SCHEMA])
if len(diff) > 0:
raise ValueError(f'Excess slots: {diff}')

if column:
cc = table_config.columns[column]
if cc.settings.applies_to_class:
Expand Down Expand Up @@ -351,7 +370,7 @@ def check_excess(descriptors):
this_enum: EnumDefinition = vmap[T_ENUM][0]
if T_PV in vmap:
for pv in vmap[T_PV]:
#pv = PermissibleValue(text=v)
# pv = PermissibleValue(text=v)
this_enum.permissible_values[pv.text] = pv
yield pv
else:
Expand All @@ -368,6 +387,9 @@ def check_excess(descriptors):
elif T_SCHEMA in vmap:
for main_elt in vmap[T_SCHEMA]:
yield main_elt
elif T_SETTING in vmap:
for main_elt in vmap[T_SETTING]:
yield main_elt
else:
raise ValueError(f'Could not find a focal element for {row}')

Expand Down Expand Up @@ -419,7 +441,8 @@ def normalize_value(self, v: str, column_config: ColumnConfig = None) -> Any:
v = None
if column_config.settings.curie_prefix:
if ':' in v:
logging.warning(f'Will not prefix {v} with {column_config.settings.curie_prefix} as it is already prefixed')
logging.warning(
f'Will not prefix {v} with {column_config.settings.curie_prefix} as it is already prefixed')
else:
v = f'{column_config.settings.curie_prefix}:{v}'
if column_config.settings.prefix:
Expand Down Expand Up @@ -537,8 +560,8 @@ def repair_schema(self, schema: SchemaDefinition) -> SchemaDefinition:
:return:
"""
sv = SchemaView(schema)
#pfx = schema.default_prefix
#if pfx not in schema.prefixes:
# pfx = schema.default_prefix
# if pfx not in schema.prefixes:
# schema.prefixes[pfx] = Prefix(pfx, f'http://example.org/{pfx}/')
# logging.info(f'Set default prefix: {schema.prefixes[pfx]}')
prefixes = set()
Expand Down Expand Up @@ -622,7 +645,8 @@ def ensure_csvreader(self, file_name: str, delimiter=None) -> str:
help="Google sheets ID. If this is specified then the arguments MUST be sheet names")
@click.option("-v", "--verbose", count=True)
@click.argument('tsv_files', nargs=-1)
def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_path: str, use_attributes: bool, unique_slots: bool, verbose: int, sort_keys: bool):
def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_path: str, use_attributes: bool,
unique_slots: bool, verbose: int, sort_keys: bool):
"""
Convert schemasheets to a LinkML schema
Expand Down Expand Up @@ -653,14 +677,8 @@ def convert(tsv_files, gsheet_id, output: TextIO, name, repair, table_config_pat
schema = sm.repair_schema(schema)
schema_dict = schema_as_dict(schema)
output.write(yaml.dump(schema_dict, sort_keys=sort_keys))
#output.write(yaml_dumper.dumps(schema))
# output.write(yaml_dumper.dumps(schema))


if __name__ == '__main__':
convert()






6 changes: 5 additions & 1 deletion schemasheets/schemasheet_datamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import yaml
from linkml_runtime.linkml_model import SlotDefinition, ClassDefinition, SchemaDefinition, \
PermissibleValue, EnumDefinition, TypeDefinition, SubsetDefinition, Prefix
from linkml_runtime.linkml_model.meta import Setting

from linkml_runtime.utils.schemaview import SchemaView

from schemasheets.conf.configschema import ColumnSettings, Shortcuts
Expand All @@ -26,6 +28,7 @@
T_TYPE = 'type'
T_SUBSET = 'subset'
T_PREFIX = 'prefix'
T_SETTING = 'setting'

tmap = {
T_SCHEMA: SchemaDefinition,
Expand All @@ -35,7 +38,8 @@
T_PV: PermissibleValue,
T_TYPE: TypeDefinition,
T_SUBSET: SubsetDefinition,
T_PREFIX: Prefix
T_PREFIX: Prefix,
T_SETTING: Setting,
}


Expand Down
3 changes: 3 additions & 0 deletions tests/test_121/input/setting_defs.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
setting name setting expansion
>setting setting_value
vowels [aeiouAEIOU]+
25 changes: 13 additions & 12 deletions tests/test_121/test_mixs_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,14 @@

# todo what about assertions into read only slots?
# linkml2schemasheets-template --source-path "https://w3id.org/linkml/meta.yaml" --output-path meta.tsv --report-style exhaustive > meta_template_report.txt

# definition_uri
# from_schema
# generation_date
# imported_from
# metamodel_version
# source_file
# source_file_date
# source_file_size
# definition_uri
# from_schema
# generation_date
# imported_from
# metamodel_version
# source_file
# source_file_date
# source_file_size

SCHEMA_NAME = 'mixs_test'

Expand All @@ -31,8 +30,9 @@

# PROBLEM_DIR = os.path.join(INPUT_DIR, 'problem_cases')

SCHEMA_DEF_TSV = os.path.join(INPUT_DIR, 'schema_def.tsv')
PREFIX_DEFS_TSV = os.path.join(INPUT_DIR, 'prefix_defs.tsv')
SCHEMA_DEF_TSV = os.path.join(INPUT_DIR, 'schema_def.tsv')
SETTING_DEFS_TSV = os.path.join(INPUT_DIR, 'setting_defs.tsv')
SUBSET_DEFS_TSV = os.path.join(INPUT_DIR, 'subset_defs.tsv')

SCHEMA_YAML = os.path.join(OUTPUT_DIR, f"{SCHEMA_NAME}.yaml")
Expand All @@ -41,14 +41,15 @@
def test_mixs_generation():
sm = SchemaMaker(use_attributes=False,
unique_slots=True,
# default_name=SCHEMA_NAME,
)
schema = sm.create_schema([
SCHEMA_DEF_TSV,
PREFIX_DEFS_TSV,
SCHEMA_DEF_TSV,
SETTING_DEFS_TSV,
SUBSET_DEFS_TSV,
])
schema = sm.repair_schema(schema)
print("\n")
print(yaml_dumper.dumps(schema))

# yaml_str = yaml_dumper.dumps(schemaview.schema)
Expand Down

0 comments on commit f27b272

Please sign in to comment.