Skip to content

Commit

Permalink
create a schema for validation of new record items
Browse files Browse the repository at this point in the history
  • Loading branch information
icornelius committed May 28, 2024
1 parent 7ff7eae commit 8be4999
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 13 deletions.
144 changes: 144 additions & 0 deletions schemas/records.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
{
"type": "object",
"properties": {
"DIMEV": {"type": "string"},
"IMEV": {"type": "string"},
"NIMEV": {"type": "string"},
"itemIncipit": {"type": "string"},
"description": {"type": "string"},
"descNote": {"type": "string"},
"authors": {
"type": "array",
"items": {
"type": "object",
"properties": {
"lastName": {"type": "string"},
"firstName": {"type": "string"},
"suffix": {"type": "string"},
"key": {"type": "string"}
},
"required": ["lastName", "firstName", "suffix", "key"]
}
},
"itemTitles": {
"type": "array",
"items": {"type": "string"}
},
"subjects": {
"type": "array",
"items": {"type": "string"}
},
"verseForms": {
"type": "array",
"items": {"type": "string"}
},
"versePatterns": {
"type": "array",
"items": {"type": "string"}
},
"languages": {
"type": "array",
"items": {"type": "string"}
},
"ghosts": {
"type": "array",
"items": {
"type": "object",
"properties": {
"key": {"type": "string"},
"note": {"type": "string"}
},
"required": ["key", "note"]
}
},
"witnesses": {
"type": "array",
"items": {
"type": "object",
"properties": {
"wit_id": {"type": "integer"},
"illust": {"type": "string"},
"music": {"type": "string"},
"allLines": {"type": "string"},
"firstLines": {"type": "string"},
"lastLines": {"type": "string"},
"sourceKey": {"type": "string"},
"point_locators": {
"type": "object",
"properties": {
"prefix": {"type": "string"},
"range": {
"type": "array",
"items": {
"type": "object",
"properties": {
"start": {"type": "string"},
"end": {"type": "string"}
},
"required": ["start", "end"]
}
}
},
"required": ["prefix", "range"]
},
"note": {"type": "string"},
"MSAuthor": {"type": "string"},
"MSTitle": {"type": "string"},
"facsimiles": {
"type": "array",
"items": {
"type": "object",
"properties": {
"key": {"type": "string"},
"point_locators": {"type": "string"}
},
"required": ["key", "point_locators"]
}
},
"editions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"key": {"type": "string"},
"point_locators": {"type": "string"}
},
"required": ["key", "point_locators"]
}
}
},
"required": [
"wit_id",
"illust",
"music",
"allLines",
"firstLines",
"lastLines",
"sourceKey",
"point_locators",
"note",
"MSAuthor",
"MSTitle",
"facsimiles",
"editions"
]
}
}
},
"required": [
"DIMEV",
"IMEV",
"NIMEV",
"itemIncipit",
"description",
"descNote",
"authors",
"itemTitles",
"subjects",
"verseForms",
"versePatterns",
"languages",
"ghosts",
"witnesses"
]
}
27 changes: 14 additions & 13 deletions scripts/transform-Records.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
# This script extracts a small sample of item records from Records.xml and
# transforms them into individual YAML files with consistent data structure.
# Warnings are emitted for known irregularities not yet accommodated.
# Successful transformations may be written to `../docs/_items`, where they are
# available to Jekyll's website builder
# Successful transformations may be validated against a target schema and
# written to `../docs/_items`, where they are available to Jekyll's website
# builder

import os
import xmltodict
import re
import yaml
import json
import jsonschema

# Top-level variables
source = '../DIMEV_XML/Records.xml'
destination = '../docs/_items/'
records_schema = '../schemas/records.json'
test_sample= ['357', '2324', '2458', '2651', '2677', '6654']
warning_log = ['Warnings from the latest run of `transform-Records.py`.\n']
log_file = '../artefacts/warnings.txt'
Expand Down Expand Up @@ -446,21 +450,15 @@ def warn(warning_type, field, parent_field, dimev_id, data):

def validate_yaml(dimev_id, conversion):
print(f'Validating YAML conversion for DIMEV {dimev_id}...')
#TODO: Validate against the specific target, not just general syntax
string = '\n'.join(conversion)
with open(records_schema) as f:
schema = json.load(f)
try:
yaml.safe_load(string)
jsonschema.validate(instance=conversion, schema=schema)
return True
except yaml.YAMLError as e:
except jsonschema.ValidationError as e:
print(e)
return False

def yaml_dump(new_record):
yml_out = yaml.dump(new_record, sort_keys=False, allow_unicode=True)
print('---')
print(yml_out)
print('---')

def write_to_file(dimev_id, conversion):
output_filename = destination + '0' * (4 - len(dimev_id)) + dimev_id + '.md'
yml_out = yaml.dump(conversion, sort_keys=False, allow_unicode=True)
Expand Down Expand Up @@ -506,7 +504,10 @@ def write_to_file(dimev_id, conversion):
if len(dimev_id) == 0:
dimev_id = '2677'
conversion = transform_item(dimev_id)
yaml_dump(conversion)
# Print the converted record to terminal
print('---')
print(yaml.dump(conversion, sort_keys=False, allow_unicode=True))
print('---')
prompt = f'Validate this conversion and write it to `{destination}`? (Y/n) '
options = ['y', 'n', '']
decision = get_valid_input(prompt, options)
Expand Down

0 comments on commit 8be4999

Please sign in to comment.