Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accept and resolve BCP47 language names (WIP) #1641

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions core/font.lua
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
--- font
-- @module SILE.font
local icu = require("justenoughicu")

local lastshaper

Expand Down Expand Up @@ -41,13 +40,6 @@ SILE.registerCommand("font", function (options, content)
SILE.settings:set("font.direction", options.direction)
end
if options.language then
if options.language ~= "und" and icu and icu.canonicalize_language then
local newlang = icu.canonicalize_language(options.language)
-- if newlang ~= options.language then
-- SU.warn("Language '"..options.language.."' not canonical, '"..newlang.."' will be used instead")
-- end
options.language = newlang
end
SILE.languageSupport.loadLanguage(options.language)
SILE.settings:set("document.language", options.language)
end
Expand Down
109 changes: 83 additions & 26 deletions core/languages.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
-- @interfaces languages

local loadkit = require("loadkit")
local cldr = require("cldr")

-- Disabled for now, see further below.
-- local cldr = require("cldr")

loadkit.register("ftl", function (file)
local contents = assert(file:read("*a"))
Expand All @@ -12,40 +14,95 @@ end)

SILE.scratch.loaded_languages = {}

local icu = require("justenoughicu")

-- This small utility could be moved to utilities as SU.forLanguage()...
-- The input is expected to be a valid BCP47 canonical language.
-- The idea is to find the "closest" language accepted by the callback:
-- Loop removing a language specifier until the callback returns a non-nil value
-- Returns that value and the matched language in that case, or nil no callback matched.
-- E.g. "xx-Xxxx-XX" will be matched against "xx-Xxxx--XX", "xx-Xxxx", "xx" until one of
-- these are satisfied.
-- Returns
-- nil if not callback could process the language
-- language, res if a callback could (returning the matched language pattern and the
-- result of the callback)
local function forLanguage(langbcp47, callback)
while langbcp47 do
local res = callback(langbcp47)
if res then
return res, langbcp47
end
langbcp47 = langbcp47:match("^(.+)-.*$") -- split at dash (-) and remove last part.
end
return nil
end

SILE.languageSupport = {
languages = {},
loadLanguage = function (language)
language = language or SILE.settings:get("document.language")
language = cldr.locales[language] and language or "und"
-- The user may have set document.language to anything, let's ensure a canonical BCP47 language...
if language ~= "und" then
language = icu.canonicalize_language(language)
-- language = cldr.locales[language] and language or "und"
end
if SILE.scratch.loaded_languages[language] then
return
end
SILE.scratch.loaded_languages[language] = true
local langresource = string.format("languages.%s", language)
local gotlang, lang = pcall(require, langresource)
if not gotlang then
SU.warn(
("Unable to load language feature support (e.g. hyphenation rules) for %s: %s"):format(
language,
lang:gsub(":.*", "")
)
)
-- We need to find language resources for this BCP47 identifier, from the less specific
-- to the more general.
local langresource, matchedlang = forLanguage(language, function (lang)
local resource = string.format("languages.%s", lang)
local gotres, res = pcall(require, resource)
return gotres and res
end)
if not langresource then
SU.warn(("Unable to load language feature support (e.g. hyphenation rules) for %s")
:format(language))
else
print(("Loaded language feature support for %s: matched %s") -- HACK We'll need a mere SU.debug when OK...
:format(language, matchedlang))
if language ~= matchedlang then
-- Now that's so UGLY. Say the input language was "en-GB".
-- It matched "en" eventually (as we don't have yet an "languages.en-GB" resources)
-- PROBLEM: Our languages.xxx files (almost) all work by side effects, putting various things,
-- in the case of our example, in SILE.nodeMarkers.en, SILE.hyphenator.languages.en
-- and SU.formatNumber.en... While we now expect the language to be "en-GB"...
-- It's a HACK, but copy the stuff into our language.
SILE.nodeMakers[language] = SILE.nodeMakers[matchedlang]
SU.formatNumber[language] = SU.formatNumber[matchedlang]
SILE.hyphenator.languages[language] = SILE.hyphenator.languages[matchedlang]
end
end
local ftlresource = string.format("languages.%s.messages", language)
SU.debug("fluent", "Loading FTL resource", ftlresource, "into locale", language)
-- This needs to be set so that we load localizations into the right bundle,
-- but this breaks the sync enabled by the hook in the document.language
-- setting, so we want to set it back when we're done.
local original_language = fluent:get_locale()
fluent:set_locale(language)
local gotftl, ftl = pcall(require, ftlresource)
if not gotftl then
SU.warn(
("Unable to load localized strings (e.g. table of contents header text) for %s: %s"):format(
language,
ftl:gsub(":.*", "")
)
)
-- We need to find fluent reources for this BCP47 identifier, from the less specific
-- to the more general.
local ftlresource, matchedi18n = forLanguage(language, function (lang)
local resource = string.format("i18n.%s", lang)
SU.debug("fluent", "Loading FTL resource", resource, "into locale", lang)
fluent:set_locale(lang)
local gotftl, ftl = pcall(require, resource)
return gotftl and ftl
end)
if not ftlresource then
SU.warn(("Unable to load localized strings (e.g. table of contents header text) for %s")
:format(language))
else
print(("Load localized strings for %s: matched %s") -- HACK We'll need a mere SU.debug when OK...
:format(language, matchedi18n))
if language ~= matchedi18n then
-- Now that's even more UGLY. Say the input language was "en-GB".
-- It matched "en" eventually (as we don't have yet an "i18n.en-GB" resources)
-- PROBLEM: the fluent locale must be set to the target language before loading
-- a ftl file. APIs that aren't stateless are messy :(
-- in the case of our example, they had to be read into "en"...
-- HACK HACK, all we can do is reloaad it fully, but under the target "en-GB" name...
local loaded = string.format("language.%s.messages", matchedi18n)
package.loaded[loaded] = nil -- HACK force reload!!!
fluent:set_locale(language)
require(string.format("i18n.%s", matchedi18n))
end
end
if type(lang) == "table" and lang.init then
lang.init()
Expand Down
Loading