diff --git a/.github/workflows/auto-updates.yml b/.github/workflows/auto-updates.yml index 7b86f09c..d55af8a9 100644 --- a/.github/workflows/auto-updates.yml +++ b/.github/workflows/auto-updates.yml @@ -22,6 +22,7 @@ jobs: - "de" - "el" - "en" + - "eo" - "es" - "fr" - "it" diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index d2c62cef..3060ddb7 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -19,8 +19,9 @@ jobs: - "ca" - "da" - "de" - # - "el" + - "el" - "en" + - "eo" - "es" - "fr" - "it" diff --git a/README.md b/README.md index 6610f5b7..8753e257 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ 1. `[DE]` [German](docs/de/README.md) 1. `[EL]` [Greek](docs/el/README.md) 1. `[EN]` [English](docs/en/README.md) +1. `[EO]` [Esperanto](docs/eo/README.md) 1. `[ES]` [Spanish](docs/es/README.md) 1. `[FR]` [French](docs/fr/README.md) ([news](https://www.mobileread.com/forums/showthread.php?t=330223&page=2)) 1. `[IT]` [Italian](docs/it/README.md) diff --git a/docs/eo/README.md b/docs/eo/README.md new file mode 100644 index 00000000..414ddbfb --- /dev/null +++ b/docs/eo/README.md @@ -0,0 +1,20 @@ +# Esperanto Dictionary For Your eBook Reader + +**eBook Reader Dictionaries** is a collaborative project using the vast [Wiktionary](https://da.wiktionary.org/) database to create a full dictionary, updated on a daily basis. + +## Interesting Features + +- [Kobo] When selecting a plural word, its singular form will be displayed. +- [Kobo] When selecting a conjugated verb, its infinitive version will be displayed. → [ate](screenshot-ate.png) +- [Kobo] When a word is the plural form of another, and also a form of a verb, all versions are displayed. +- If a word contains several pronunciations, or genders, all are available. → [board](screenshot-board.png) +- Mathematic formulas are converted to SVG. → [Pythagorean trigonometric identity](screenshot-pythagorean_trigonometric_identity.png) +- Hieroglyphs are supported. → [tjaty](screenshot-tjaty.png) + +## Downloads + +https://github.com/BoboTiG/ebook-reader-dict/releases/tag/eo + +## Installation + +Copy the dictionnary inside the `.kobo/custom-dict/` folder on your eBook reader. diff --git a/docs/eo/screenshot-ate.png b/docs/eo/screenshot-ate.png new file mode 100644 index 00000000..6b5e9934 Binary files /dev/null and b/docs/eo/screenshot-ate.png differ diff --git a/docs/eo/screenshot-board.png b/docs/eo/screenshot-board.png new file mode 100644 index 00000000..e21cd72f Binary files /dev/null and b/docs/eo/screenshot-board.png differ diff --git a/docs/eo/screenshot-pythagorean_trigonometric_identity.png b/docs/eo/screenshot-pythagorean_trigonometric_identity.png new file mode 100644 index 00000000..bd9411c7 Binary files /dev/null and b/docs/eo/screenshot-pythagorean_trigonometric_identity.png differ diff --git a/docs/eo/screenshot-tjaty.png b/docs/eo/screenshot-tjaty.png new file mode 100644 index 00000000..d4923708 Binary files /dev/null and b/docs/eo/screenshot-tjaty.png differ diff --git a/scripts/__main__.py b/scripts/__main__.py index dbcd790d..dce91728 100644 --- a/scripts/__main__.py +++ b/scripts/__main__.py @@ -22,6 +22,8 @@ "en-labels.py": "wikidict/lang/en/labels.py", "en-langs.py": "wikidict/lang/en/langs.py", "en-places.py": "wikidict/lang/en/places.py", + "eo-langs.py": "wikidict/lang/eo/langs.py", + "eo-tags.py": "wikidict/lang/eo/tags.py", "es-langs.py": "wikidict/lang/es/langs.py", "es-campos-semanticos.py": "wikidict/lang/es/campos_semanticos.py", "fr-domain-templates.py": "wikidict/lang/fr/domain_templates.py", diff --git a/scripts/all-namespaces.py b/scripts/all-namespaces.py index 81dca597..44aa43ac 100644 --- a/scripts/all-namespaces.py +++ b/scripts/all-namespaces.py @@ -6,7 +6,8 @@ ids = {6, 14} # File, and Category results: dict[str, list[str]] = {} -locales = ("ca", "da", "de", "el", "en", "es", "fr", "it", "no", "pt", "ro", "ru", "sv") +# XXX_LOCALES +locales = ("ca", "da", "de", "el", "en", "eo", "es", "fr", "it", "no", "pt", "ro", "ru", "sv") for locale in locales: result_discard_last: list[str] = [] diff --git a/scripts/eo-langs.py b/scripts/eo-langs.py new file mode 100644 index 00000000..f5f4324d --- /dev/null +++ b/scripts/eo-langs.py @@ -0,0 +1,18 @@ +from scripts_utils import get_content + +text = get_content("https://eo.wiktionary.org/w/index.php?title=%C5%9Cablono:tbllingvoj&action=raw") +text = text.split("############################################################")[1].strip() + +langs = {} +for line in text.splitlines(): + if not line: + continue + line = line.split(",", 1)[0] + line = line.replace("[[", "").replace("]]", "") + key, value = line.split(" ", 1) + langs[key.strip()] = value.strip() + +print("langs = {") +for key, value in sorted(langs.items()): + print(f' "{key}": "{value}",') +print(f"}} # {len(langs):,}") diff --git a/scripts/eo-tags.py b/scripts/eo-tags.py new file mode 100644 index 00000000..11ec7952 --- /dev/null +++ b/scripts/eo-tags.py @@ -0,0 +1,18 @@ +import re + +from scripts_utils import get_content + +text = get_content("https://eo.wiktionary.org/wiki/Modulo:mtagg?action=raw") + +# Special treatment +text = text.replace('"ava"..string.char(197,173)', f'"ava{chr(197)}{chr(173)}"') + +# Uniformize maps +# contabtt ['ASKI'] = 'askia signo' → contabtt["ASKI"] = "askia signo" +text = re.sub(r"contabtt \['([^']+)'\] = '([^'|]+)'", r'contabtt["\1"] = "\2"', text) + +tags = re.findall(r'^\s+contab\w+\s*\["([^"]+)"\]\s*=\s*"([^"|]+)', text, flags=re.MULTILINE) +print("tags = {") +for key, value in sorted(tags): + print(f' "{key}": "{value}",') +print(f"}} # {len(tags):,}") diff --git a/tests/data/eo/Teodoriko.wiki b/tests/data/eo/Teodoriko.wiki new file mode 100644 index 00000000..70399b69 --- /dev/null +++ b/tests/data/eo/Teodoriko.wiki @@ -0,0 +1,11 @@ +=={{Lingvo|eo}}== +{{Etimologio}} +: El la itala. + +==={{Vortospeco|persona nomo|eo}}, {{g|m}}=== + +{{Ekzemploj}} +:[1] '''''Teodoriko''''', dirita la Granda, estis reĝo de ostrogotoj ekde 474 kaj reĝo de Italujo ekde 493 ĝis 526, dua ĝermana reĝo de Romo. + +===={{Tradukoj}}==== +# [[Théodoric]], [[Thierry]]. diff --git a/tests/data/eo/alkazabo.wiki b/tests/data/eo/alkazabo.wiki new file mode 100644 index 00000000..d7f9fc94 --- /dev/null +++ b/tests/data/eo/alkazabo.wiki @@ -0,0 +1,24 @@ +{{Vorto de la semajno|45|2011}}{{Vorto de la semajno|45|2012}}{{Vorto de la semajno|45|2013}} + +=={{Lingvo|eo}}== + +==={{Vortospeco|substantivo|eo}}=== +{{Deklinacio-eo}} +{{bildodek|Almeria Alcazaba (fcm).jpg|'''alkazabo''' de Almería}} + +{{Deveno}} +: el la [[andalus-araba]] ''[[alqaṣába]]'', kaj tiu ĉi el la klasika [[araba]] ''[[qaṣabah]]'', [[قصبة]] + +===={{Signifoj}}==== +# {{k|eo|F: historio|arkitekturo|militado}} fortikita konstruaĵaro; [[citadelo]] [[aŭ]] [[palaco]] [[de]] [[araba]] [[ĉefo]] en [[Nord-Afriko]] kaj [[Suda-Hispanio]] [http://vortaro.net/#Alkazabo Alkazabo en vortaro.net] + +===={{Tradukoj}}==== +{{trad-eko}} +* franca: {{t|fr|alcazaba|f}}, {{t|fr|citadelle|f}}, casbah +* germana: {{t|de|Alcazaba|f}}, {{t|de|Festung|f}} +{{trad-mezo}} +* hispana: {{t|es|alcazaba|f}} +* itala: {{t|it|alcazaba|f}}, {{t|it|cittadella|f}} +{{trad-fino}} + +{{Referencoj}} diff --git a/tests/data/eo/ekamus.wiki b/tests/data/eo/ekamus.wiki new file mode 100644 index 00000000..1cf78aba --- /dev/null +++ b/tests/data/eo/ekamus.wiki @@ -0,0 +1,4 @@ +=={{Lingvo|eo}}== +==={{Vortospeco|verba formo|eo}}=== + +{{form-eo}} diff --git a/tests/data/eo/kaskedo.wiki b/tests/data/eo/kaskedo.wiki new file mode 100644 index 00000000..60c35a0c --- /dev/null +++ b/tests/data/eo/kaskedo.wiki @@ -0,0 +1,40 @@ +=={{Lingvo|eo}}== +{{8}} +==={{Vortospeco|substantivo|eo}}=== +[[Dosiero:Casquette a helice.jpg|thumb|[1] '''''kaskedo''''']] +{{Deklinacio-eo}} + +{{Vorterseparo}} +:kasked/o. + +===={{Signifoj}}==== +:[1] Ĉapo kun viziero, civilvesta aŭ uniforma: ''homoj armitaj en nigraj kaskedetoj{{ref-Grabowski}}; la hotela pordisto levis sian kaskedon.'' + +{{Ekzemploj}} +:[1] Volis +:la ĝardenisto pajlan ĉapelon, +:la ŝipisto maristan ĉapon, +:la ĵurnalisto kvadratitan ''kaskedon'', +:la ĉambristino puntan kufon, +:la ĝendarmo militistan kepon, +:la ĉampano [[korko]]n, +:la ĝemelo du supersignojn, +:la ĵonglisto tri aŭ kvar...{{La milito de la ĉapeloj}} + +===={{Tradukoj}}==== +{{trad-eko}} +*dana: [1] {{t|da|kasket}} {{g|u}} + +*franca: [1] {{t|fr|casquette}} {{g|f}} +{{trad-mezo}} + +{{trad-fino}} + +{{Referencoj}} +* {{vikipedenlin|kaskedo}} +* {{ref-ReVo|kaskedo}} +* {{ref-PIV}} +* {{ref-Tato|eo}} +* {{ref-Simpla Vortaro|kaskedo}} + +{{Fontoj}} diff --git a/tests/data/eo/komputilo.wiki b/tests/data/eo/komputilo.wiki new file mode 100644 index 00000000..377bb395 --- /dev/null +++ b/tests/data/eo/komputilo.wiki @@ -0,0 +1,37 @@ +{{vikipedio}} + +== Esperanto == +=== Substantivo === +{{livs|eo|SB|fra=[L:komput(i)]+[I:il]+[U:o]}} +{{Deklinacio-eo}} +{{bildodek|ThinkCentre_S50.jpg}} +{{bildodek|ENIAC-changing_a_tube.jpg|tre granda kaj malnova '''komputilo'''|Raspberry_Pi_4_Model_B_-_Side.jpg|malgranda nova '''komputilo'''}} + +===={{Signifoj}}==== +# {{k|eo|F: komputado}} [[maŝino]] aŭ [[elektronikaĵo]] kiu kapablas [[kalkuli]], precipe sen intervenoj de homoj, aŭ rapide trakti, stori, kaj preni larĝajn kvantojn de [[datumo]] + +{{Sinonimoj}} +''(arkaikaj kaj evitendaj)'' [[komputero]], [[komputoro]], [[komputatoro]] + +===={{Tradukoj}}==== +{{trad-eko}} +* angla: {{t|en|computer}} +* finna: {{t|fi|tietokone}} +* franca: '''1., 2.''' {{t|fr|calculatrice}} {{g|f}}, {{t|fr|ordinateur}} {{g|m}} +* germana: '''1., 2.''' {{t|de|Computer}} {{g|m}}, {{t|de|Rechner}} {{g|m}} +* hispana: '''1., 2.''' {{t|es|ordenador}} {{g|m}}, {{t|es|computadora}} {{g|f}}, {{t|es|computador}} {{g|m}} +* indonezia: {{t|id|komputer}} +* kroata: '''1., 2.''' {{t|hr|računalo}}, {{t|hr|kompjuter}} +{{trad-mezo}} +* nederlanda: '''1., 2.''' {{t|nl|computer|m}} +* pola: '''1., 2.''' {{t|pl|komputer}} {{g|m}} +* portugala: '''1., 2.''' {{t|pt|computador}} {{g|m}} +* rusa: {{t|ru|компьютер|m}}, {{t|ru|ЭВМ|f}} +* sveda: {{t|sv|dator|u}} +{{trad-fino}} + +{{Referencoj}} +* {{ref-PIV}} +* {{ref-Simpla Vortaro|{{PAGENAME}}}} +* {{ref-Majstro|{{PAGENAME}}}} +* {{ref-Tato|eo}} diff --git a/tests/data/eo/latina.wiki b/tests/data/eo/latina.wiki new file mode 100644 index 00000000..52f0391d --- /dev/null +++ b/tests/data/eo/latina.wiki @@ -0,0 +1,20 @@ +=={{Lingvo|eo}}== + +{{Etimologio}} +De [[Latino]] + +=== Adjektivo === +#rilata al [[Latino]]. + +===={{Tradukoj}}==== +{{trad-eko}} +* angla: {{t|en|Latin}} +* franca: {{t|fr|latin}} +{{trad-mezo}} +* germana: {{t|de|lateinisch}} +{{trad-fino}} + +{{Referencoj}} +* {{ref-Majstro|latina}} + +[[Kategorio:Radiko latin']] diff --git a/tests/data/eo/luko.wiki b/tests/data/eo/luko.wiki new file mode 100644 index 00000000..c17cb0ef --- /dev/null +++ b/tests/data/eo/luko.wiki @@ -0,0 +1,55 @@ +=={{Lingvo|eo}}== +{{8}} +==={{Vortospeco|substantivo|eo}}=== + +{{Deklinacio-eo}} + +{{Deveno}} +:el la germana ''[[Luke]]'' + +{{Vorterseparo}} +:luk/o. + +===={{Signifoj}}==== +:Aperturo: +:[1] ordinare vitrita aŭ kradita, en tegmento, plafono aŭ kelo, por enlasi lumon: ''mansarda luko''. +:[2] fermebla per pordo aŭ tabuloj, en la ferdeko de ŝipo, por ebligi penetron en la holdon (pli precize: holdluko). +:[3] fermita per kovrilo el giso, kiu en la strato, sur trotuaro ks ebligas al metiisto malsupreniri en kloakon, aŭ subteran galerion. + +{{Sinonimoj}} +:[1] lumluko, bovokulo, vazistaso. + +{{Derivaĵoj}} +:[1] + +{{Derivaĵoj}} +:[[luketo]]. Malgranda luko en pordo tra kiu oni povas ekvidi la personon, kiu sonoris. +:[[lumluko]]. luko 1. + +===={{Tradukoj}}==== +{{trad-eko}} +*angla: [1] {{t|en|bull's eye}}, {{t|en|porthole}}, port hole +*franca: [1] {{t|fr|lucarne}} {{g|f}}, {{t|fr|sabord}} {{g|m}} +*germana: [1] {{t|de|Bullauge}} {{g|n}}, {{t|de|Dachfenster}}, {{t|de|Deckenfenster}}, {{t|de|Mansardenfenster}}, {{t|de|Luke}}, {{t|de|Dachluke}} +*malaltgermana: [1] {{t|nds|Bulloog}} +*pola: [1] {{t|pl|bulaj}} {{g|m}}, {{t|pl|iluminator}} {{g|m}}, {{t|pl|świetlik}} {{g|m}} +{{trad-mezo}} +*portugala: [1] {{t|pt|vigia}} {{g|f}} +*hispana: [1] {{t|es|ojo de buey}}, {{t|es|escudilla}} +*sveda: [1] {{t|sv|ventil}}, {{t|sv|oxöga}} (veraltet), {{t|sv|fönsterventil}} {{g|u}} + +{{trad-fino}} + +{{Referencoj}} +{{vikipedio|luko}} +* {{ref-ReVo|luko}} +* {{ref-PIV}} +* {{ref-Tato|eo}} +* {{ref-Simpla Vortaro|luko}} + +{{Fontoj}} + +{{Similaĵoj}} +: + +[[Kategorio:Dubinda (Esperanto)]] diff --git "a/tests/data/eo/\342\231\215.wiki" "b/tests/data/eo/\342\231\215.wiki" new file mode 100644 index 00000000..575d0da1 --- /dev/null +++ "b/tests/data/eo/\342\231\215.wiki" @@ -0,0 +1,9 @@ +{{character info}} +=={{Lingvo|mul}}== +==={{Vortospeco|signo|mul}}=== + +===={{Signifoj}}==== +# {{k|mul|F: astrologio}} zodiaka signo de [[Virgulino]] (''[[Virgo]]'') + +{{nevideblafinodesekcio}} +{{emojibox}} diff --git "a/tests/data/eo/\360\237\222\200.wiki" "b/tests/data/eo/\360\237\222\200.wiki" new file mode 100644 index 00000000..17fa54f6 --- /dev/null +++ "b/tests/data/eo/\360\237\222\200.wiki" @@ -0,0 +1,8 @@ +== translingva == +=== Signo === +{{livs|mul|KK}} +{{signo|des=SKULL}} +{{k|mul|T: BILD.}} + +===={{Signifoj}}==== +# morto diff --git a/tests/test_eo.py b/tests/test_eo.py new file mode 100644 index 00000000..f9f798f0 --- /dev/null +++ b/tests/test_eo.py @@ -0,0 +1,124 @@ +from collections.abc import Callable + +import pytest + +from wikidict.render import parse_word +from wikidict.stubs import Definitions +from wikidict.utils import process_templates + + +@pytest.mark.parametrize( + "word, pronunciations, genders, etymology, definitions, variants", + [ + ( + "♍", + [], + [], + [], + ["(astrologio) zodiaka signo de Virgulino (Virgo)"], + [], + ), + ( + "💀", + [], + [], + [], + ["morto"], + [], + ), + ( + "alkazabo", + [], + [], + ["el la andalus-araba alqaṣába, kaj tiu ĉi el la klasika araba qaṣabah, قصبة"], + [ + "(historio; arkitekturo; militado) fortikita konstruaĵaro; citadelo aŭ palaco de araba ĉefo en Nord-Afriko kaj Suda-Hispanio" + ], + [], + ), + ( + "ekamus", + [], + [], + [], + [], + ["ekami"], + ), + ( + "kaskedo", + ["kasked/o"], + [], + [], + [ + "Ĉapo kun viziero, civilvesta aŭ uniforma: homoj armitaj en nigraj kaskedetoj; la hotela pordisto levis sian kaskedon." + ], + [], + ), + ( + "komputilo", + [], + [], + [], + [ + "(komputado) maŝino aŭ elektronikaĵo kiu kapablas kalkuli, precipe sen intervenoj de homoj, aŭ rapide trakti, stori, kaj preni larĝajn kvantojn de datumo" + ], + [], + ), + ( + "latina", + [], + [], + ["De Latino"], + ["rilata al Latino."], + [], + ), + ( + "luko", + ["luk/o"], + [], + ["el la germana Luke"], + [ + "ordinare vitrita aŭ kradita, en tegmento, plafono aŭ kelo, por enlasi lumon: mansarda luko.", + "fermebla per pordo aŭ tabuloj, en la ferdeko de ŝipo, por ebligi penetron en la holdon (pli precize: holdluko).", + "fermita per kovrilo el giso, kiu en la strato, sur trotuaro ks ebligas al metiisto malsupreniri en kloakon, aŭ subteran galerion.", + ], + [], + ), + ( + "Teodoriko", + [], + ["m"], + ["El la itala."], + [], + [], + ), + ], +) +def test_parse_word( + word: str, + pronunciations: list[str], + genders: list[str], + etymology: list[Definitions], + definitions: list[Definitions], + variants: list[str], + page: Callable[[str, str], str], +) -> None: + """Test the sections finder and definitions getter.""" + code = page(word, "eo") + details = parse_word(word, code, "eo", force=True) + assert pronunciations == details.pronunciations + assert genders == details.genders + assert etymology == details.etymology + assert definitions == details.definitions + assert variants == details.variants + + +@pytest.mark.parametrize( + "wikicode, expected", + [ + ("", ""), + ], +) +def test_process_template(wikicode: str, expected: str) -> None: + """Test templates handling.""" + assert process_templates("foo", wikicode, "eo") == expected diff --git a/wikidict/check_word.py b/wikidict/check_word.py index 12b75de5..e008d7db 100644 --- a/wikidict/check_word.py +++ b/wikidict/check_word.py @@ -155,6 +155,11 @@ def filter_html(html: str, locale: str) -> str: if a["href"].lower().startswith(("#cite", "#mw")): a.decompose() + elif locale == "eo": + # + for a in bs.find_all("sup", {"class": "reference"}): + a.decompose() + elif locale == "es": # Replace color rectangle for span in bs.find_all("span", {"id": "ColorRect"}): diff --git a/wikidict/lang/eo/__init__.py b/wikidict/lang/eo/__init__.py new file mode 100644 index 00000000..f6c04b25 --- /dev/null +++ b/wikidict/lang/eo/__init__.py @@ -0,0 +1,304 @@ +"""Esperanto language.""" + +# Float number separator +import re + +from ...user_functions import uniq + +float_separator = "," + +# Thousads separator +thousands_separator = " " + +# Markers for sections that contain interesting text to analyse. +section_patterns = ("#", r":\[\d+\]", r"\*") +section_sublevels = (3, 4) +head_sections = ("{{lingvo|eo}}", "{{lingvo|mul}}", "esperanto", "multldingva", "translingva") +etyl_section = ("{{deveno}}", "{{etimologio}}") +sections = ( + *etyl_section, + "adjektivo", + "adverbo", + "difinoj", + "infikso", + "interjekcio", + "konjunkcio", + "malllongigo", + "mallongigoj", + "numeralo", + "prefikso", + "prepozicio", + "pronomo", + "radiko", + "signifo", + "signo", + "subjunkcio", + "substantivo", + "sufikso", + "verba formo", + "verbo", + "{{signifoj}", + "{{vortospeco|adjektiva formo|eo}", + "{{vortospeco|adjektivo|eo}", + "{{vortospeco|adverbo|eo}", + "{{vortospeco|antaŭfiksaĵo|eo}", + "{{vortospeco|artikolo|eo}", + "{{vortospeco|demanda adverbo|eo}", + "{{vortospeco|esprimo|eo}", + "{{vortospeco|finaĵo|eo}", + "{{vortospeco|frazo|eo}", + "{{vortospeco|interjekcio|eo}", + "{{vortospeco|konjunkcio|eo}", + "{{vortospeco|liternomo|eo}", + "{{vortospeco|litero|eo}", + "{{vortospeco|literoparo|eo}", + "{{vortospeco|loknomo|eo}", + "{{vortospeco|mallongigo|eo}", + "{{vortospeco|mallongigo|mul}", + "{{vortospeco|mona nomo|eo}", + "{{vortospeco|nombro|eo}", + "{{vortospeco|nomo|eo}", + "{{vortospeco|numeralo|eo}", + "{{vortospeco|partikulo|eo}", + "{{vortospeco|persona nomo|eo}", + "{{vortospeco|persona pronomo|eo}", + "{{vortospeco|poseda pronomo|eo}", + "{{vortospeco|postfiksaĵo|eo}", + "{{vortospeco|prepozicio|eo}", + "{{vortospeco|pronomo|eo}", + "{{vortospeco|scienca nomo|mul}", + "{{vortospeco|signo|mul}", + "{{vortospeco|simbolo|eo}", + "{{vortospeco|simbolo|mul}", + "{{vortospeco|subjunkcio|eo}", + "{{vortospeco|substantiva formo|eo}", + "{{vortospeco|substantivo|eo}", + "{{vortospeco|substantivo|mul}", + "{{vortospeco|sufikso|eo}", + "{{vortospeco|verbo ambaŭtransitiva|eo}", + "{{vortospeco|verba formo|eo}", + "{{vortospeco|verbo|eo}", + "{{vortospeco|verbo netransitiva|eo}", + "{{vortospeco|verbo transitiva|eo}", + "{{vortospeco|vortgrupo|eo}", +) + +# Variants +variant_titles = sections +variant_templates = ("{{form-eo}}",) + +# Some definitions are not good to keep (plural, gender, ... ) +definitions_to_ignore = ( + # + # For variants + # + "form-eo", +) + +# Templates to ignore: the text will be deleted. +templates_ignored = ( + "?", + "aŭdo", + "barileto", + "fundamenta", + "IFA", + "N", + "PRON", + "quote-book", + "quote-magazine", + "ref-AdE", + "ref-Grabowski", + "ref-Kalman", + "ref-PrV", + "ref-ReVo", + "radiofoniaj liternomoj", + "rima", + "vian", + "Vd", + "Vidu ankaŭ", + "W", + "X", +) + +# Templates more complex to manage. +templates_multi = { + # {{fina|o}} + "fina": "parts[1]", + # {{inte|o}} + "inte": "parts[1]", + # {{mems|du}} + "mems": "parts[1]", + # {{pref|mis}} + "pref": "parts[1]", + # {{radi|vort}} + "radi": "parts[1]", + # {{sufi|il}} + "sufi": "parts[1]", + # {{Vortospeco|mona nomo|eo}} + "Vortospeco": "capitalize(parts[1])", +} + +# Templates that will be completed/replaced using custom text. +templates_other = { + "🏠": "🏠 arkitekturo", + "🌄": "geografio", + "🍴": "gastronomia", + "📖": "📖 (presarto kaj libroj)", + "📷": "📷 fotografio kaj kinotekniko", + "⏚": "⏚ elektro kaj elektroteĥniko", + "✞": "✞ kristanismo", + "❤": "❤ korpostrukturo kaj histologio:", + "☆": "☆ belartoj", + "♉": "♉ bestologio", + "👥": "👥 komunuza senso", + "🍁": "🍁 herbiko", + "✈": "✈ aviado", + "♠": "♠ ludoj", + "☼": "☼ terscieco (inkl. mineral- kaj rokoscienco)", + "⚓": "⚓ marnavigado kaj ŝipoj", + "⚕": "(⚕ kuracscienco kaj kirurgio)", + "♜": "♜ historio", + "Λ": "Λ lingv.", + "⊕": "⊕ terologio (inkl. mineralogio kaj petrologio):", + "♧": "♧ beletro", + "Ⓣ": "Ⓣ (teknikoj [inkl. mekanikon kaj metalurgion])", + "☇": "☇ forkomunikoj (inkl. radioforsonadon, videaĵojn kaj elektrosonsciencon)", + "Π": "Π prahistorio", + "Θ": "(Θ religioj)", + "𝅘𝅥𝅰": "𝅘𝅥𝅰 muziko", + "⚔": ( + 'terkulturo", + "EKON": "ekon.", + "HOR": "hortikulturo, arbokulturo, arbarkultivo", + "KRI": "krist.", + "TRA": "trafiko", + "figurs.": "figursenca", + "hist.": "♜ hist.", + "ĵar.": "ĵar.", + "lat. tardío": "malfrua latina", + "lat. vulg.": "vulgara latina", + "ling.": "Λ lingv.", + "mar.": "⚓ maraferoj", + "poe.": ( + ' poetiko, poezio' + ), +} +templates_other["Ĵar."] = templates_other["ĵar."] +templates_other["Ling."] = templates_other["ling."] +templates_other["Mar."] = templates_other["mar."] +templates_other["MUZ"] = templates_other["𝅘𝅥𝅰"] +templates_other["Poe."] = templates_other["poe."] + + +# Release content on GitHub +# https://github.com/BoboTiG/ebook-reader-dict/releases/tag/eo +release_description = """\ +Vortoj kalkulas: {words_count} +Vikivortaro rubejo: {dump_date} + +Plena versio: +{download_links_full} + +Etimologio-libera versio: +{download_links_noetym} + +Ĝisdatigita je {creation_date} +""" + +# Dictionary name that will be printed below each definition +wiktionary = "Vikivortaro (ɔ) {year}" + + +def find_genders( + code: str, + pattern: re.Pattern[str] = re.compile(r"{g\|(\w+)"), +) -> list[str]: + """ + >>> find_genders("") + [] + >>> find_genders("{{g|m}}") + ['m'] + """ + return uniq(pattern.findall(code)) + + +def find_pronunciations( + code: str, + pattern1: re.Pattern[str] = re.compile(r"\{\{PRON\|`([^`]+)`"), + pattern2: re.Pattern[str] = re.compile(r"\{\{IFA\|([^}]+)}}"), +) -> list[str]: + """ + >>> find_pronunciations("") + [] + >>> find_pronunciations("{{PRON|`luk/o.`}}") + ['luk/o'] + >>> find_pronunciations("{{PRON|`[[advent]]•[[o]]`}}") + ['advent•o'] + >>> find_pronunciations("{{PRON|`{{radi|vultur}} + o`}}") + ['vultur + o'] + >>> find_pronunciations("{{PRON|` {{radi|dekstr}} + {{fina|a}}`}}") + ['dekstr + a'] + >>> find_pronunciations("{{IFA|/vitpunkto/}}") + ['/vitpunkto/'] + """ + from ...utils import process_templates + + return [ + process_templates("", match.rstrip("."), "eo") + for match in pattern1.findall(code) or pattern2.findall(code) or [] + ] + + +def last_template_handler(template: tuple[str, ...], locale: str, word: str = "") -> str: + """ + Will be called in utils.py::transform() when all template handlers were not used. + + >>> last_template_handler(["form-eo"], "eo", word="surdaj") + 'surda' + """ + from .. import defaults + from .template_handlers import lookup_template, render_template + + if lookup_template(template[0]): + return render_template(word, template) + + return defaults.last_template_handler(template, locale, word=word) diff --git a/wikidict/lang/eo/langs.py b/wikidict/lang/eo/langs.py new file mode 100644 index 00000000..cb704060 --- /dev/null +++ b/wikidict/lang/eo/langs.py @@ -0,0 +1,420 @@ +""" +List of langs. +Auto-generated with `python -m scripts`. +""" + +# START +langs = { + "aa": "afara", + "aaa": "gotua", + "ab": "abĥaza", + "abl": "lampunga", + "abq": "abaza", + "abx": "abaknona", + "ace": "aĉea", + "ady": "adigea", + "ae": "avesta", + "af": "afrikansa", + "afh": "Afrihilio", + "agf": "arguna", + "aie": "vitiaza amara", + "aii": "asiria", + "ain": "ainua", + "ak": "akana", + "akg": "anakalanga", + "akk": "akada", + "akm": "akaboa", + "alp": "aluna", + "alr": "alutora", + "als": "toskalbana", + "alt": "altaja", + "am": "amhara", + "ami": "amisa", + "amk": "ambaja", + "an": "aragona", + "ang": "anglosaksa", + "apw": "apaĉa okcidenta", + "aqc": "arĉia", + "ar": "araba", + "arc": "aramea", + "arn": "mapuĉa", + "arw": "aravaka", + "arz": "araba egipta", + "as": "asama", + "ast": "asturia", + "aua": "asumbua", + "av": "avara", + "avd": "alvirovidara", + "avk": "Kotavao", + "ay": "ajmara", + "az": "azerbajĝana", + "ba": "baŝkira", + "bac": "kanekesa", + "bal": "baluĉa", + "ban": "balia", + "bar": "bavara", + "bbc": "bataktoba", + "be": "belorusa", + "bew": "betava", + "bg": "bulgara", + "bh": "bihara", + "bi": "bislama", + "bin": "edoa", + "bm": "bambara", + "bn": "bengala", + "bo": "tibeta", + "bqi": "baktiara", + "br": "bretona", + "bs": "bosna", + "btx": "karoa", + "bua": "burjata", + "bug": "buĝia", + "bzt": "Brithenigo", + "ca": "kataluna", + "cbk": "ĉavacana", + "ccc": "ĉamikura", + "cdo": "orientmina", + "ce": "ĉeĉena", + "ceb": "cebua", + "ch": "ĉamora", + "cho": "ĉakta", + "chr": "ĉeroka", + "cia": "ĉiaĉiaa", + "cjs": "ŝora", + "cjy": "ĵina", + "ckb": "sorana", + "cmn": "mandarena ĉina", + "co": "korsika", + "cop": "kopta", + "cr": "kria", + "crh": "krime-tatara", + "cs": "ĉeĥa", + "csb": "kaŝuba", + "cu": "malnovslava", + "cv": "ĉuvaŝa", + "cy": "kimra", + "da": "dana", + "de": "germana", + "diq": "zazaa dimlia", + "dlm": "dalmata", + "dsb": "malsuprasoraba", + "dtp": "dusuna", + "dum": "nederlanda mezepoka", + "dv": "maldiva", + "dz": "dzonka", + "ee": "evea", + "egy": "egipta antikva", + "el": "greka", + "en": "angla", + "enm": "meza angla", + "eo": "Esperanto", + "es": "hispana", + "et": "estona", + "eu": "eŭska", + "evn": "evenka", + "ext": "ekstremadura", + "fa": "persa", + "fi": "finna", + "fit": "tornivalfinna", + "fj": "fiĝia", + "fo": "feroa", + "fr": "franca", + "frm": "meza franca", + "fro": "malnovfranca", + "frr": "nordfrisa", + "fur": "friula", + "fy": "frisa", + "ga": "irlanda", + "gag": "gagauza", + "gan": "gana", + "gd": "skotgaela", + "gez": "geeza", + "gil": "kiribata", + "gl": "galega", + "gmh": "mezaltgermana", + "gml": "mezmalaltgermana", + "gn": "gvarania", + "goh": "malnovaltgermana", + "gor": "gorontala", + "got": "gota", + "grc": "greka antikva", + "gsw": "alzaca", + "gu": "guĝarata", + "gv": "manksa", + "ha": "haŭsa", + "hak": "hakaa", + "har": "harara", + "haw": "havaja", + "hbo": "antikva hebrea", + "he": "hebrea", + "hi": "hindia", + "hif": "fiĝia hindia", + "hit": "hitita", + "ho": "hirimotua", + "hr": "kroata", + "hsb": "suprasoraba", + "ht": "haitia", + "hu": "hungara", + "hy": "armena", + "hz": "herera", + "ia": "Interlingvao", + "id": "indonezia", + "ie": "Okcidentalo", + "ifa": "amganadifuga", + "ig": "igba", + "ii": "siĉuana loloa", + "ik": "inupiaka", + "inh": "inguŝa", + "io": "Ido", + "is": "islanda", + "it": "itala", + "itl": "itelmena", + "iu": "inuktituta", + "ja": "japana", + "jbo": "Loĵbano", + "jv": "java", + "ka": "kartvela", + "kaa": "karakalpaka", + "kam": "kambaa", + "kck": "kalanga", + "ket": "keta", + "kg": "konga", + "kge": "komeringa", + "ki": "kikuja", + "kiu": "zazaa kirmanka", + "kj": "kuanjama", + "kjh": "ĥakasa", + "kk": "kazaĥa", + "kkv": "kangeana", + "kl": "gronlanda", + "km": "kmera", + "kn": "kanara", + "ko": "korea", + "koi": "komipermjaka", + "kr": "kanura", + "krc": "karaĉaja-balkara", + "krl": "karela", + "ks": "kaŝmira", + "ktu": "kituba", + "ku": "kurda", + "kum": "kumika", + "kv": "komia", + "kw": "kornvala", + "ky": "kirgiza", + "la": "latina", + "lad": "judhispana", + "lb": "luksemburga", + "lbe": "laka", + "ldn": "Laadano", + "lez": "lezga", + "lfn": "Elefeno", + "lg": "ganda", + "li": "limburga", + "ln": "lingala", + "lo": "laŭa", + "lt": "litova", + "lua": "lubakasa", + "luo": "Luo", + "lus": "miza", + "luy": "luĥja", + "lv": "latva", + "mad": "madura", + "mak": "makasara", + "mdf": "mokŝa", + "mg": "malagasa", + "mh": "marŝala", + "mi": "maoria", + "min": "minankabaŭa", + "mk": "makedona", + "ml": "malajala", + "mn": "mongola", + "mnc": "manĉura", + "mo": "moldava", + "mr": "marata", + "ms": "malaja", + "mt": "malta", + "mui": "musia", + "mul": "translingva", + "my": "birma", + "myv": "erzja", + "na": "naura", + "nah": "naŭatla", + "nan": "minnana", + "nap": "napola", + "nb": "norvega bukmola", + "nci": "klasika naŭatla", + "nd": "norda ndebela", + "nds": "platgermana", + "ne": "nepalia", + "new": "nevara", + "ng": "ndonga", + "niu": "niua", + "nl": "nederlanda", + "nn": "norvega nova", + "nnf": "ngainga", + "no": "norvega", + "nog": "nogaja", + "non": "malnovnordlanda", + "nov": "Novialo", + "nr": "suda ndebela", + "nv": "navaha", + "ny": "njanĝa", + "oc": "okcitana", + "ofs": "malnovfrisa", + "oj": "aniŝinabeka", + "olo": "olonec-karela", + "om": "oroma", + "omy": "malnovmalaja", + "or": "orija", + "orv": "antikva orientslava", + "orw": "orovina", + "os": "oseta", + "osi": "osinga", + "osn": "malnovsunda", + "osx": "malnovsaksa", + "pa": "panĝaba", + "pam": "pampanga", + "pap": "Papiamento", + "pdc": "pensilvanigermana", + "peo": "malnovpersa", + "pfl": "palatinata", + "pi": "palia", + "pl": "pola", + "plm": "palembanga", + "pox": "polaba", + "ppi": "paipaia", + "prg": "prusa", + "ps": "paŝtuna", + "pt": "portugala", + "pzn": "paranaga", + "qu": "keĉua", + "qua": "kvapava", + "quv": "sakapulteka", + "qya": "kvenja", + "qyp": "kviripia", + "raa": "dungmalia", + "rap": "rapanuia", + "rej": "reĝanga", + "rhg": "rohinĝa", + "rm": "romanĉa", + "rn": "burunda", + "ro": "rumana", + "rom": "romaa", + "ru": "rusa", + "rue": "rusina", + "rup": "arumana", + "rut": "rutula", + "rw": "ruanda", + "ryu": "okinava", + "sa": "Sanskrito", + "sah": "jakuta", + "sat": "santala", + "sc": "sarda", + "scn": "sicilia", + "sco": "skota", + "sd": "sinda", + "sdn": "sarda galuresa", + "se": "samea norda", + "sg": "sangoa", + "sga": "malnovirlanda", + "sgs": "ĵemajtia", + "sh": "serbokroata", + "si": "sinhala", + "sio": "Slovio", + "sip": "sikimesa", + "sjn": "sindara", + "sk": "slovaka", + "sl": "slovena", + "sm": "samoa", + "sma": "samea suda", + "sn": "ŝona", + "so": "somala", + "sq": "albana", + "sr": "serba", + "src": "sarda logudoresa", + "ss": "svazia", + "st": "sota", + "stq": "saterfrisa", + "su": "sunda", + "sud": "Solresolo", + "sux": "sumera", + "sv": "sveda", + "sva": "svana kartvela", + "sw": "svahila", + "szl": "silezia pola", + "ta": "tamila", + "tay": "atajala", + "tcs": "toresa kreola", + "te": "telugua", + "tet": "tetuna", + "tg": "taĝika", + "th": "taja", + "ti": "tigraja", + "tig": "tigrea", + "tk": "turkmena", + "tkn": "tokunoŝima", + "tkp": "tikopia", + "tl": "tagaloga", + "tlb": "tobela", + "tld": "talaŭda", + "tlh": "klingona", + "tn": "cvana", + "to": "tongana", + "tok": "Tokipono", + "tpi": "Tokpisino", + "tpw": "tupia antikva", + "tr": "turka", + "ts": "conga", + "tt": "tatara", + "tw": "akankasa", + "ty": "tahitia", + "tyv": "tuva", + "tzl": "talosa", + "tzm": "tamaziĥta", + "udm": "udmurta", + "ug": "ujgura", + "uga": "ugarita", + "uk": "ukraina", + "ur": "Urduo", + "uz": "uzbeka", + "vec": "venecia", + "vep": "vepsa", + "vi": "vjetnama", + "vo": "Volapuko", + "vor": "vora", + "vro": "voroa", + "wa": "valona", + "wam": "masaĉuseca", + "war": "varaja", + "win": "vinebaga", + "wls": "valisa", + "wo": "volofa", + "wrh": "viraĝura", + "wuu": "vua", + "wym": "vilamovica", + "xal": "kalmuka", + "xbo": "bolgara", + "xh": "kosa", + "xlc": "likia", + "xmm": "malaja manada", + "xno": "anglonormanda", + "xsr": "ŝerpa", + "xto": "orientotokaria", + "xum": "umbra", + "yaq": "jakia", + "yi": "jida", + "yo": "joruba", + "yrk": "neneca", + "ysr": "sirenika", + "yua": "jukatanmajaa", + "yue": "kantona", + "za": "ĝuanga", + "ze": "ligura", + "zh": "ĉina", + "zsm": "malajzia", + "zu": "zulua", + "zun": "zuna", + "zza": "zazaa", + "zzj": "zuoĵiangzuanga", +} # 411 +# END diff --git a/wikidict/lang/eo/tags.py b/wikidict/lang/eo/tags.py new file mode 100644 index 00000000..284acab6 --- /dev/null +++ b/wikidict/lang/eo/tags.py @@ -0,0 +1,59 @@ +""" +List of tags. +Auto-generated with `python -m scripts`. +""" + +# START +tags = { + "ARAB": "araba cifero", + "ASKI": "askia signo", + "ASKL": "askia litero", + "BILD": "bilda signo", + "CIRI": "cirila litero", + "CNKO": "cxina signo komuna", + "CNSI": "cxina signo simpligita", + "CNTR": "cxina signo tradicia", + "GRKO": "greka aux kopta signo", + "IFAS": "IFA-signo", + "JAKA": "japana signo kanjxia", + "KEMSI": "kemia simbolo", + "LATI": "latina litero", + "MATE": "matematika signo", + "MATSI": "matematika simbolo", + "ROMA": "roma cifero", + "TIBE": "tibeta signo", + "VALSI": "simbolo de valuto", + "VALU": "signo de valuto", + "VDT": "duoble transitiva verbo", + "VIT": "netransitiva", + "VOD": "transitiva verbo kaj objekto deviga", + "VRF": "refleksiva formo", + "VRG": "nepre refleksiva verbo", + "VRH": "kvazauxnepre refleksiva verbo", + "VTR": "transitiva", + "ajly": "-ly", + "avaÅ­": "-aux", + "bas": "baza vortoprovizo", + "deta": "derivajxo de tabelvorto", + "dyna": "loka dinamika", + "idi": "idioma vortgrupo", + "kofr": "kofrovorto", + "kole": "kolektiva", + "lokn": "loknomo", + "miks": "miksajxo", + "nutr": "utruma(n)", + "onom": "onomatopeo", + "par": "idioma vortoparo", + "pers": "persona nomo", + "plen-inv": "vorto de la plena inventaro de Kotapedia", + "plpl": "nurplurala", + "prov": "proverbo", + "sapl": "samformplurala", + "senk": "sen komparado", + "sing": "nursingulara", + "sla": "slanga", + "stat": "loka statika", + "tneu": "neuxtruma(t)", + "vul": "vulgara", +} # 50 +# END diff --git a/wikidict/lang/eo/template_handlers.py b/wikidict/lang/eo/template_handlers.py new file mode 100644 index 00000000..1e61fee1 --- /dev/null +++ b/wikidict/lang/eo/template_handlers.py @@ -0,0 +1,243 @@ +from collections import defaultdict + +from ...user_functions import concat, extract_keywords_from, italic, small, strong, superscript, term +from .langs import langs +from .tags import tags + + +def render_deveno3(tpl: str, parts: list[str], data: defaultdict[str, str], word: str = "") -> str: + """ + >>> render_deveno3("deveno3", ["eo", "egy", "Aa"], defaultdict(str, {"sg": "granda"})) + 'la egipta antikva vorto " Aa " → egy (= granda)' + >>> render_deveno3("deveno3", ["eo", "egy", "Aa"], defaultdict(str, {"sg": "-"})) + 'la egipta antikva vorto " Aa " → egy' + >>> render_deveno3("deveno3", ["en", "ang", "bridd"], defaultdict(str)) + 'la anglosaksa vorto " bridd " → ang' + """ + parts.pop(0) # Remove the source lang + lang = parts.pop(0) + phrase = f'la {langs[lang]} vorto " {strong(parts.pop(0))} "' + if lang != "grc": + phrase += f" {superscript('→ ' + lang)}" + if (sg := data["sg"]) not in {"", "-"}: + phrase += f" (= {sg})" + return phrase + + +def render_elpropra(tpl: str, parts: list[str], data: defaultdict[str, str], word: str = "") -> str: + """ + >>> render_elpropra("elpropra", ["eo", "P", "Albert Einstein"], defaultdict(str, {"not": "fizikisto"})) + 'la nomo de persono "Albert Einstein" (fizikisto)' + >>> render_elpropra("elpropra", ["eo", "-", "testo"], defaultdict(str, {"wpl": "-"})) + 'la nomo "testo"' + >>> render_elpropra("elpropra", ["eo", "P", "Юрий Цолакович Оганесянz"], defaultdict(str, {"wpl": "-", "ts": "Juri Zolakowitsch Oganessia"})) + 'la nomo de persono "Юрий Цолакович Оганесянz" Juri Zolakowitsch Oganessia' + """ + parts.pop(0) # Remove the source lang + phrase = "la nomo " + + match parts.pop(0): + case "L": + phrase += "de lando/regno " + case "M": + phrase += "de mita estaĵo " + case "P": + phrase += "de persono " + case "R": + phrase += "de rivero " + case "T": + phrase += "de monto " + case "U": + phrase += "de urbo " + case "V": + phrase += "de provinco " + + phrase += f'"{parts.pop(0)}"' + + if ts := data["ts"]: + phrase += f" {italic(ts)}" + + if special := data["not"]: + phrase += f" {small('('+special+')')}" + + return phrase + + +def render_form(tpl: str, parts: list[str], data: defaultdict[str, str], word: str = "") -> str: + """ + Souce: https://eo.wiktionary.org/w/index.php?title=Modulo:meoformo&oldid=1027456 + Date : 2021-12-19 22:43 + + >>> render_form("form-eo", [], defaultdict(str), word="ekamus") + 'ekami' + >>> render_form("form-eo", [], defaultdict(str), word="hispanan") + 'hispana' + >>> render_form("form-eo", [], defaultdict(str), word="surdaj") + 'surda' + >>> render_form("form-eo", [], defaultdict(str), word="inexistant") + 'inexistant' + """ + for suffix, last_char in [ + ("on", "o"), + ("oj", "o"), + ("ojn", "o"), + ("an", "a"), + ("aj", "a"), + ("ajn", "a"), + ("as", "i"), + ("is", "i"), + ("os", "i"), + ("us", "i"), + ("u", "i"), + ]: + if word.endswith(suffix): + return f"{word.removesuffix(suffix)}{last_char}" + return word + + +def render_g(tpl: str, parts: list[str], data: defaultdict[str, str], word: str = "") -> str: + """ + >>> render_g("g", ["nm"], defaultdict(str)) + 'neŭtra, vira' + """ + return { + "m": italic("vira"), + "f": italic("ina"), + "n": italic("neŭtra"), + "u": italic("komuna"), + "mf": italic("vira, ina"), + "nm": italic("neŭtra, vira"), + }.get(parts[0], parts[0]) + + +def render_hebr(tpl: str, parts: list[str], data: defaultdict[str, str], word: str = "") -> str: + """ + >>> render_hebr("Hebr", ["בַּיִת כְּנֶסֶת"], defaultdict(str, {"d-heb": "bayiṯ kenæsæt", "b": "domo por renkontiĝo"})) + 'בַּיִת כְּנֶסֶת, CHA bayiṯ kenæsæt, „domo por renkontiĝo“' + >>> render_hebr("Hebr", ["שול"], defaultdict(str, {"d-yid": "shul", "b": "(Religions)"})) + 'שול, YIVO shul, „(Religions)“' + >>> render_hebr("Hebr", ["אסנוגה"], defaultdict(str, {"d-lad": "esnoga", "b": "Sinagogo, preĝejo"})) + 'אסנוגה, esnoga, „Sinagogo, preĝejo“' + """ + phrase = parts.pop(0) + if heb := data["d-heb"]: + phrase += f", CHA {italic(heb)}" + elif yid := data["d-yid"]: + phrase += f", YIVO {italic(yid)}" + elif lad := data["d-lad"]: + phrase += f", {italic(lad)}" + if b := data["b"]: + phrase += f", „{b}“" + return phrase + + +def render_k(tpl: str, parts: list[str], data: defaultdict[str, str], word: str = "") -> str: + """ + >>> render_k("k", ["mul", "astrologio"], defaultdict(str)) + '(astrologio)' + >>> render_k("k", ["eo", "arkitekturo", "% pri tempo kiel objekto"], defaultdict(str)) + '(arkitekturo; pri tempo kiel objekto)' + >>> render_k("k", ["eo", "S: % iomete-", "arkitekturo"], defaultdict(str)) + '(iomete arkitekturo)' + >>> render_k("k", ["eo", "T: ASKI.", "MATSI.", "F: historio", "arkitekturo", "S: % poezia", "S: vulgara", "C: % nepiva", "C: & nepiva"], defaultdict(str)) + '(askia signo; matematika simbolo; historio; arkitekturo; poezia; vulgara; nepiva; nepiva)' + >>> render_k("k", ["eo", "G: VTR."], defaultdict(str)) + '(transitiva)' + >>> render_k("k", ["eo", "G: VIT.-", '% kun prepozicio "pri"', "% eĉ-", "VTR.", "C: % celante senvivaĵon aŭ abstraktaĵon"], defaultdict(str)) + '(netransitiva kun prepozicio "pri"; eĉ transitiva; celante senvivaĵon aŭ abstraktaĵon)' + """ + raw_themes = [] + current_type = "" + any_is_prefix = False + + for part in parts[1:]: # Skip the lang + if ":" in part: + current_type, part = part.split(":") + + part = part.strip().replace("% ", "").replace("& ", "") + + if is_prefix := part.endswith("-"): + part = part.removesuffix("-") + any_is_prefix = True + + match current_type: + case "": + pass + case "C" | "F" | "S": + part = tags.get(part, part) + case "G" | "T": + part = tags.get(part.removesuffix("."), part) + case _: + assert 0, f"Unhandled `k` type {current_type!r}" + + if is_prefix: + part += "-" + raw_themes.append(part) + + if any_is_prefix: + # Merge items: when an item ends with a dash, then it must be concat with the next item in the list + themes = [] + idx = 0 + while idx < len(raw_themes): + if raw_themes[idx].endswith("-"): + themes.append(f"{raw_themes[idx].removesuffix('-')} {raw_themes[idx + 1]}") + idx += 1 + else: + themes.append(raw_themes[idx]) + idx += 1 + else: + themes = raw_themes + + return term(concat(themes, sep="; ")) + + +def render_t(tpl: str, parts: list[str], data: defaultdict[str, str], word: str = "") -> str: + """ + >>> render_t("t", ["id", "roti"], defaultdict(str)) + 'roti → id' + >>> render_t("t", ["id", "roti", "nm"], defaultdict(str)) + 'roti → id neŭtra, vira' + >>> render_t("t", ["grc", "ὄνομα", "n"], defaultdict(str, {"ts": "ónuma", "not": "kvin literoj"})) + 'ὄνομα neŭtra ónuma (kvin literoj)' + >>> render_t("t", ["ja", "脱出する"], defaultdict(str, {"sa": "だっしゅつする", "ts": "dasshutsu suru"})) + '脱出する → ja aŭ だっしゅつする → ja dasshutsu suru' + """ + lang = parts.pop(0) + phrase = parts.pop(0) + if lang != "grc": + phrase += f" {superscript('→ ' + lang)}" + if parts: + phrase += f" {render_g('g', [parts.pop(0)], defaultdict(), word=word)}" + + if other := data["sa"]: + phrase += f" aŭ {other} {superscript('→ ' + lang)}" + if trans := data["ts"]: + phrase += f" {italic(trans)}" + else: + if trans := data["ts"]: + phrase += f" {italic(trans)}" + if note := data["not"]: + phrase += f" ({small(note)})" + + return phrase + + +template_mapping = { + "deveno3": render_deveno3, + "elpropra": render_elpropra, + "form-eo": render_form, + "g": render_g, + "Hebr": render_hebr, + "k": render_k, + "t": render_t, +} + + +def lookup_template(tpl: str) -> bool: + return tpl in template_mapping + + +def render_template(word: str, template: tuple[str, ...]) -> str: + tpl, *parts = template + data = extract_keywords_from(parts) + return template_mapping[tpl](tpl, parts, data, word=word) diff --git a/wikidict/namespaces.py b/wikidict/namespaces.py index 2fc0a42d..a0d8b8d8 100644 --- a/wikidict/namespaces.py +++ b/wikidict/namespaces.py @@ -10,6 +10,7 @@ "de": ["Bild", "Datei", "Image", "Kategorie"], "el": ["Image", "Αρχείο", "Εικόνα", "Κατηγορία"], "en": ["CAT", "Category", "File", "Image"], + "eo": ["Dosiero", "Image", "Kategorio"], "es": ["Archivo", "Categoría", "Image", "Imagen"], "fr": ["Catégorie", "Fichier", "Image"], "it": ["Categoria", "File", "Image", "Immagine"], diff --git a/wikidict/render.py b/wikidict/render.py index e9834d7f..d44693e7 100644 --- a/wikidict/render.py +++ b/wikidict/render.py @@ -176,6 +176,8 @@ def get_items(patterns: tuple[str, ...], *, skip: tuple[str, ...] | None = None) items = get_items((": ",)) case "en": items = get_items(("",), skip=("===etymology", "{{pie root")) + case "eo": + items = get_items((":",)) case "es": items = get_items((r";\d",), skip=("=== etimología",)) case "fr": @@ -456,6 +458,29 @@ def adjust_wikicode(code: str, locale: str) -> str: # Remove tables (cf issue #2073) code = re.sub(r"^\{\|.*?\|\}", "", code, flags=re.DOTALL | re.MULTILINE) + elif locale == "eo": + # Wipe out {{Deklinacio-eo}} + code = code.replace("{{Deklinacio-eo}}", "") + + # For variants + # {{form-eo}} → # {{form-eo}} + code = code.replace("{{form-eo}}", "# {{form-eo}}") + + # {{xxx}} → ==== {{xxx}} ==== + # {{xx-x}} → ==== {{xx-x}} ==== + code = re.sub(r"^(\{\{[\w\-]+\}\})", r"==== \1 ====", code, flags=re.MULTILINE) + + # ===={{Tradukoj}}==== → =={{Tradukoj}}== + code = re.sub( + r"====\s*(\{\{(?:Ekzemploj|Derivaĵoj|Referencoj|Sinonimoj|Tradukoj|Vortfaradoj|trad-\w+)\}\})\s*====", + r"== \1 ==", + code, + flags=re.MULTILINE, + ) + + # Easier pronunciation + code = re.sub(r"==== {{Vorterseparo}} ====\s*:(.+)\s*", r"\n{{PRON|`\1`}}\n", code, flags=re.MULTILINE) + elif locale == "es": # {{ES|xxx|núm=n}} → == {{lengua|es}} == code = re.sub(r"^\{\{ES\|.+\}\}", r"== {{lengua|es}} ==", code, flags=re.MULTILINE) @@ -551,7 +576,10 @@ def parse_word(word: str, code: str, locale: str, force: bool = False) -> Word: definitions = find_definitions(word, parsed_sections, locale) elif locale in {"no", "pt"}: # Some words have no head sections but only a list of definitions at the root of the "top" section - marker = "===" if locale == "no" else "==" + marker = { + "no": "===", + "pt": "==", + }[locale] for top in top_sections: contents = top.contents top.contents = contents[: contents.find(marker)]