From 8be49995efcd5c3eabbf930c448d073c83ebce67 Mon Sep 17 00:00:00 2001 From: Ian Cornelius Date: Mon, 27 May 2024 22:30:21 -0500 Subject: [PATCH] create a schema for validation of new record items --- schemas/records.json | 144 +++++++++++++++++++++++++++++++++++ scripts/transform-Records.py | 27 +++---- 2 files changed, 158 insertions(+), 13 deletions(-) create mode 100644 schemas/records.json diff --git a/schemas/records.json b/schemas/records.json new file mode 100644 index 0000000..1128942 --- /dev/null +++ b/schemas/records.json @@ -0,0 +1,144 @@ +{ + "type": "object", + "properties": { + "DIMEV": {"type": "string"}, + "IMEV": {"type": "string"}, + "NIMEV": {"type": "string"}, + "itemIncipit": {"type": "string"}, + "description": {"type": "string"}, + "descNote": {"type": "string"}, + "authors": { + "type": "array", + "items": { + "type": "object", + "properties": { + "lastName": {"type": "string"}, + "firstName": {"type": "string"}, + "suffix": {"type": "string"}, + "key": {"type": "string"} + }, + "required": ["lastName", "firstName", "suffix", "key"] + } + }, + "itemTitles": { + "type": "array", + "items": {"type": "string"} + }, + "subjects": { + "type": "array", + "items": {"type": "string"} + }, + "verseForms": { + "type": "array", + "items": {"type": "string"} + }, + "versePatterns": { + "type": "array", + "items": {"type": "string"} + }, + "languages": { + "type": "array", + "items": {"type": "string"} + }, + "ghosts": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": {"type": "string"}, + "note": {"type": "string"} + }, + "required": ["key", "note"] + } + }, + "witnesses": { + "type": "array", + "items": { + "type": "object", + "properties": { + "wit_id": {"type": "integer"}, + "illust": {"type": "string"}, + "music": {"type": "string"}, + "allLines": {"type": "string"}, + "firstLines": {"type": "string"}, + "lastLines": {"type": "string"}, + "sourceKey": {"type": "string"}, + "point_locators": { + "type": "object", + "properties": { + "prefix": {"type": "string"}, + "range": { + "type": "array", + "items": { + "type": "object", + "properties": { + "start": {"type": "string"}, + "end": {"type": "string"} + }, + "required": ["start", "end"] + } + } + }, + "required": ["prefix", "range"] + }, + "note": {"type": "string"}, + "MSAuthor": {"type": "string"}, + "MSTitle": {"type": "string"}, + "facsimiles": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": {"type": "string"}, + "point_locators": {"type": "string"} + }, + "required": ["key", "point_locators"] + } + }, + "editions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": {"type": "string"}, + "point_locators": {"type": "string"} + }, + "required": ["key", "point_locators"] + } + } + }, + "required": [ + "wit_id", + "illust", + "music", + "allLines", + "firstLines", + "lastLines", + "sourceKey", + "point_locators", + "note", + "MSAuthor", + "MSTitle", + "facsimiles", + "editions" + ] + } + } + }, + "required": [ + "DIMEV", + "IMEV", + "NIMEV", + "itemIncipit", + "description", + "descNote", + "authors", + "itemTitles", + "subjects", + "verseForms", + "versePatterns", + "languages", + "ghosts", + "witnesses" + ] +} diff --git a/scripts/transform-Records.py b/scripts/transform-Records.py index 07e0211..bbe3692 100644 --- a/scripts/transform-Records.py +++ b/scripts/transform-Records.py @@ -1,17 +1,21 @@ # This script extracts a small sample of item records from Records.xml and # transforms them into individual YAML files with consistent data structure. # Warnings are emitted for known irregularities not yet accommodated. -# Successful transformations may be written to `../docs/_items`, where they are -# available to Jekyll's website builder +# Successful transformations may be validated against a target schema and +# written to `../docs/_items`, where they are available to Jekyll's website +# builder import os import xmltodict import re import yaml +import json +import jsonschema # Top-level variables source = '../DIMEV_XML/Records.xml' destination = '../docs/_items/' +records_schema = '../schemas/records.json' test_sample= ['357', '2324', '2458', '2651', '2677', '6654'] warning_log = ['Warnings from the latest run of `transform-Records.py`.\n'] log_file = '../artefacts/warnings.txt' @@ -446,21 +450,15 @@ def warn(warning_type, field, parent_field, dimev_id, data): def validate_yaml(dimev_id, conversion): print(f'Validating YAML conversion for DIMEV {dimev_id}...') - #TODO: Validate against the specific target, not just general syntax - string = '\n'.join(conversion) + with open(records_schema) as f: + schema = json.load(f) try: - yaml.safe_load(string) + jsonschema.validate(instance=conversion, schema=schema) return True - except yaml.YAMLError as e: + except jsonschema.ValidationError as e: print(e) return False -def yaml_dump(new_record): - yml_out = yaml.dump(new_record, sort_keys=False, allow_unicode=True) - print('---') - print(yml_out) - print('---') - def write_to_file(dimev_id, conversion): output_filename = destination + '0' * (4 - len(dimev_id)) + dimev_id + '.md' yml_out = yaml.dump(conversion, sort_keys=False, allow_unicode=True) @@ -506,7 +504,10 @@ def write_to_file(dimev_id, conversion): if len(dimev_id) == 0: dimev_id = '2677' conversion = transform_item(dimev_id) - yaml_dump(conversion) + # Print the converted record to terminal + print('---') + print(yaml.dump(conversion, sort_keys=False, allow_unicode=True)) + print('---') prompt = f'Validate this conversion and write it to `{destination}`? (Y/n) ' options = ['y', 'n', ''] decision = get_valid_input(prompt, options)