From f324c7313d2b8685f58858eac6cc6647df3e9e7d Mon Sep 17 00:00:00 2001 From: Svetlana Yatsyk <78039821+Svetlana-Yatsyk@users.noreply.github.com> Date: Mon, 30 Dec 2024 10:29:27 +0100 Subject: [PATCH 1/4] Create distinguo-GT-metadata.yml --- .../distinguo-GT-metadata.yml | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml diff --git a/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml b/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml new file mode 100644 index 0000000..9ce2264 --- /dev/null +++ b/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml @@ -0,0 +1,45 @@ + + +schema: https://htr-united.github.io/schema/2023-06-27/schema.json +title: >- + DISTINGUO : Ground truth for Handwritten Text Recognition (HTR) on Collections + of Distinctions (late 13th to late 15th century) +url: https://nakala.fr/10.34847/nkl.48ad8b8d +authors: + - name: Svetlana + surname: Yatsyk + orcid: 0000-0001-5356-7746 + roles: + - transcriber + - aligner + - name: Marjorie + surname: Burghart + orcid: 0000-0001-7754-4389 +institutions: [] +description: >- + This dataset contains normalized transcriptions of collections of + distinctions, specifically "Summa de abstinentia" by Nicolas of Biard and + "Dictionarium bovis" by Thomas of Pavia. They were prepared as part of the + DISTINGUO project, dedicated to the study of distinctiones in medieval Latin + preaching and led by Marjorie Burghart in 2019-2024. +project-website: https://distinguo.huma-num.fr/ +language: + - lat +production-software: eScriptorium + Kraken +automatically-aligned: false +script: + - iso: Latn +script-type: only-manuscript +time: + notBefore: '1250' + notAfter: '1499' +hands: + count: 1-per-folder + precision: estimated +license: + name: CC-BY 4.0 + url: https://creativecommons.org/licenses/by/4.0/ +format: Page-XML +volume: + - metric: lines + count: 15190 From c7f462b6c8a252c4aff0e25568b7fdf626ef1be2 Mon Sep 17 00:00:00 2001 From: Svetlana Yatsyk <78039821+Svetlana-Yatsyk@users.noreply.github.com> Date: Mon, 30 Dec 2024 10:40:40 +0100 Subject: [PATCH 2/4] Update distinguo-GT-metadata.yml --- .../distinguo-GT-metadata.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml b/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml index 9ce2264..309dff6 100644 --- a/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml +++ b/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml @@ -12,9 +12,6 @@ authors: roles: - transcriber - aligner - - name: Marjorie - surname: Burghart - orcid: 0000-0001-7754-4389 institutions: [] description: >- This dataset contains normalized transcriptions of collections of @@ -23,8 +20,7 @@ description: >- DISTINGUO project, dedicated to the study of distinctiones in medieval Latin preaching and led by Marjorie Burghart in 2019-2024. project-website: https://distinguo.huma-num.fr/ -language: - - lat +language: [] production-software: eScriptorium + Kraken automatically-aligned: false script: @@ -43,3 +39,10 @@ format: Page-XML volume: - metric: lines count: 15190 + - metric: lines + count: 15190 + - metric: lines + count: 15190 + - metric: pages + count: 318 + From d5c9612ae63b2c9c2586019a4b5e38dfe8a8254b Mon Sep 17 00:00:00 2001 From: Svetlana Yatsyk <78039821+Svetlana-Yatsyk@users.noreply.github.com> Date: Mon, 30 Dec 2024 10:46:11 +0100 Subject: [PATCH 3/4] Update distinguo-GT-metadata.yml --- .../distinguo-GT-metadata.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml b/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml index 309dff6..77aae5f 100644 --- a/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml +++ b/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml @@ -39,10 +39,10 @@ format: Page-XML volume: - metric: lines count: 15190 - - metric: lines - count: 15190 - - metric: lines - count: 15190 + - metric: characters + count: 682486 + - metric: regions + count: 1076 - metric: pages count: 318 From 6e37f11852e1049f27b9357b9b3f2530b6f15b1f Mon Sep 17 00:00:00 2001 From: Svetlana Yatsyk <78039821+Svetlana-Yatsyk@users.noreply.github.com> Date: Mon, 30 Dec 2024 11:50:30 +0100 Subject: [PATCH 4/4] added APA reference --- .../distinguo-GT-metadata.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml b/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml index 77aae5f..de49e1d 100644 --- a/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml +++ b/catalog/distinguo-ground-truth-for-handwritten-text-recognition-htr-on-collections-of-distinctions-late-13th-to-late-15th-century/distinguo-GT-metadata.yml @@ -36,6 +36,13 @@ license: name: CC-BY 4.0 url: https://creativecommons.org/licenses/by/4.0/ format: Page-XML +sources: + - reference: >- + Yatsyk, S. (2024). DISTINGUO : Ground truth for Handwritten Text + Recognition (HTR) on Collections of Distinctions (late 13th to late 15th + century) (Version 1) [Data set]. NAKALA - https://nakala.fr (Huma-Num - + CNRS). + link: https://doi.org/10.34847/NKL.48AD8B8D volume: - metric: lines count: 15190