diff --git a/.github/workflows/deepwell.yaml b/.github/workflows/deepwell.yaml index 3eb24d61fb..7c9634c2cf 100644 --- a/.github/workflows/deepwell.yaml +++ b/.github/workflows/deepwell.yaml @@ -129,4 +129,10 @@ jobs: run: cd deepwell && cargo fmt --all -- --check - name: Clippy - run: cd deepwell && cargo clippy --no-deps + run: cd deepwell && cargo clippy --no-deps -- -A unused_imports + + # clippy is over aggressive with "unused import" warnings, reporting it for + # prelude modules and common export patterns, which is noisy and unhelpful. + # + # Since regular (i.e. actual) unused imports will fail the normal build, we + # can just suppress all unused import warnings in Clippy. diff --git a/deepwell/.clippy.toml b/deepwell/.clippy.toml new file mode 100644 index 0000000000..9c58226fb6 --- /dev/null +++ b/deepwell/.clippy.toml @@ -0,0 +1,2 @@ +# Code smells +disallowed-names = ["foo", "bar", "baz", "todo"] diff --git a/deepwell/Cargo.lock b/deepwell/Cargo.lock index 3339cf91c4..8609fbe5e1 100644 --- a/deepwell/Cargo.lock +++ b/deepwell/Cargo.lock @@ -1060,6 +1060,7 @@ dependencies = [ "femme", "filemagic", "fluent", + "fluent-syntax", "ftml", "futures", "hex", diff --git a/deepwell/Cargo.toml b/deepwell/Cargo.toml index 175f7501b4..0a9afc1180 100644 --- a/deepwell/Cargo.toml +++ b/deepwell/Cargo.toml @@ -31,6 +31,7 @@ either = "1" femme = "2" filemagic = "0.12" fluent = "0.16" +fluent-syntax = "0" ftml = { version = "1.22", features = ["mathml"] } futures = { version = "0.3", features = ["async-await"], default-features = false } hex = { version = "0.4", features = ["serde"] } diff --git a/deepwell/src/endpoints/locale.rs b/deepwell/src/endpoints/locale.rs index aefc783bbb..4bc8a55919 100644 --- a/deepwell/src/endpoints/locale.rs +++ b/deepwell/src/endpoints/locale.rs @@ -32,9 +32,9 @@ pub struct LocaleOutput { } #[derive(Deserialize, Debug, Clone)] -pub struct TranslateInput<'a> { - locale: &'a str, - messages: HashMap>, +pub struct TranslateInput { + locales: Vec, + messages: HashMap>, } type TranslateOutput = HashMap; @@ -58,18 +58,29 @@ pub async fn translate_strings( ctx: &ServiceContext<'_>, params: Params<'static>, ) -> Result { - let TranslateInput { - locale: locale_str, - messages, - } = params.parse()?; + let TranslateInput { locales, messages } = params.parse()?; + + if locales.is_empty() { + error!("No locales specified in translate call"); + return Err(ServiceError::NoLocalesSpecified); + } info!( - "Translating {} message keys in locale {locale_str}", + "Translating {} message keys in locale {} (or {} fallbacks)", messages.len(), + &locales[0], + locales.len() - 1, ); - let locale = LanguageIdentifier::from_bytes(locale_str.as_bytes())?; let mut output: TranslateOutput = HashMap::new(); + let locales = { + let mut langids = Vec::new(); + for locale in locales { + let langid = LanguageIdentifier::from_bytes(locale.as_bytes())?; + langids.push(langid); + } + langids + }; for (message_key, arguments_raw) in messages { info!( @@ -80,7 +91,7 @@ pub async fn translate_strings( let arguments = arguments_raw.into_fluent_args(); let translation = ctx.localization() - .translate(&locale, &message_key, &arguments)?; + .translate(&locales, &message_key, &arguments)?; output.insert(message_key, translation.to_string()); } diff --git a/deepwell/src/locales/fallback.rs b/deepwell/src/locales/fallback.rs new file mode 100644 index 0000000000..bae744dcd7 --- /dev/null +++ b/deepwell/src/locales/fallback.rs @@ -0,0 +1,145 @@ +/* + * locales/fallback.rs + * + * DEEPWELL - Wikijump API provider and database manager + * Copyright (C) 2019-2023 Wikijump Team + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +//! Module to implement locale fallbacks. +//! +//! This is different than having a list of locales and simply trying each one. +//! Beyond that, this is another important component to finding a proper locale. +//! +//! Given some locale, it iterates through increasingly generic forms of it +//! until a match can be found (or not). +//! +//! The order followed is: +//! * Language, script, region, and variant (unmodified) +//! * Language, script, and region +//! * Language and script +//! * Language, region, and variant +//! * Language and region +//! * Language only +//! +//! The logic here will skip a locale variant if it's already been outputted. +//! So for a locale like `ko`, it will only emit one item, `ko`. For something like `en-CA`, +//! it will emit `en-CA` then `en`. +//! +//! If `Some(_)` or `Err(_)` is returned, then iteration will end prematurely. + +use unic_langid::LanguageIdentifier; + +pub fn iterate_locale_fallbacks( + mut locale: LanguageIdentifier, + mut f: F, +) -> Option<(LanguageIdentifier, T)> +where + F: FnMut(&LanguageIdentifier) -> Option, +{ + debug!("Iterating through locale fallbacks for {locale}"); + + macro_rules! try_iter { + () => { + if let Some(result) = f(&locale) { + return Some((locale, result)); + } + }; + } + + // Storage of temporarily removed fields. + let variants: Vec<_> = locale.variants().cloned().collect(); + + // Unmodified locale + // Language, script, region, variant + try_iter!(); + + if !variants.is_empty() { + // Remove variant + // Language, script, region + locale.clear_variants(); + try_iter!(); + } + + // Remove region + // Language, script + let region = locale.region.take(); + if region.is_some() { + try_iter!(); + } + + if locale.script.take().is_some() { + // Re-add region and variant, remove script + // Language, region, variant + locale.region = region; + locale.set_variants(&variants); + try_iter!(); + + if !variants.is_empty() { + // Remove variant + // Language, region + locale.clear_variants(); + try_iter!(); + } + + if locale.region.is_some() { + // Remove region + // Language only + locale.region = None; + try_iter!(); + } + } + + // No results + None +} + +#[test] +fn fallbacks() { + fn check(locale: &str, expected: &[&str]) { + let locale = locale.parse().expect("Unable to parse locale"); + let mut actual = Vec::new(); + + iterate_locale_fallbacks::<_, ()>(locale, |locale| { + actual.push(str!(locale)); + None + }); + + assert!( + actual.iter().eq(expected), + "Actual fallback locale list doesn't match expected\nactual: {:?}\nexpected: {:?}", + actual, + expected, + ); + } + + check("en", &["en"]); + check("fr-be", &["fr-BE", "fr"]); + check("es-Latn", &["es-Latn", "es"]); + check("en-Latn-US", &["en-Latn-US", "en-Latn", "en-US", "en"]); + check("en-Valencia", &["en-valencia", "en"]); + check("en_CA_valencia", &["en-CA-valencia", "en-CA", "en"]); + check( + "en_Latn-CA_valencia", + &[ + "en-Latn-CA-valencia", + "en-Latn-CA", + "en-Latn", + "en-CA-valencia", + "en-CA", + "en", + ], + ); +} diff --git a/deepwell/src/locales/fluent.rs b/deepwell/src/locales/fluent.rs index 541ed1ecdc..6534084288 100644 --- a/deepwell/src/locales/fluent.rs +++ b/deepwell/src/locales/fluent.rs @@ -19,15 +19,17 @@ */ use super::error::{fluent_load_err, LocalizationLoadError}; +use super::fallback::iterate_locale_fallbacks; use crate::services::Error as ServiceError; use async_std::fs; use async_std::path::{Path, PathBuf}; use async_std::prelude::*; use fluent::{bundle, FluentArgs, FluentMessage, FluentResource}; +use fluent_syntax::ast::Pattern; use intl_memoizer::concurrent::IntlLangMemoizer; use std::borrow::Cow; use std::collections::HashMap; -use std::fmt::{self, Debug}; +use std::fmt::{self, Debug, Display}; use unic_langid::LanguageIdentifier; pub type FluentBundle = bundle::FluentBundle; @@ -112,38 +114,34 @@ impl Localizations { } } - fn get_message( - &self, + /// Retrieve the specified Fluent bundle and message. + fn get_message<'a>( + &'a self, locale: &LanguageIdentifier, - key: &str, - ) -> Result<(&FluentBundle, FluentMessage), ServiceError> { + path: &str, + ) -> Result<(&'a FluentBundle, FluentMessage), ServiceError> { match self.bundles.get(locale) { None => Err(ServiceError::LocaleMissing), - Some(bundle) => match bundle.get_message(key) { + Some(bundle) => match bundle.get_message(path) { Some(message) => Ok((bundle, message)), None => Err(ServiceError::LocaleMessageMissing), }, } } - pub fn translate<'a>( + /// Retrieve the specified Fluent pattern from the associated bundle. + fn get_pattern<'a>( &'a self, locale: &LanguageIdentifier, - key: &str, - args: &'a FluentArgs<'a>, - ) -> Result, ServiceError> { - // Get appropriate message and bundle - let (path, attribute) = Self::parse_selector(key); - let (bundle, message) = self.get_message(locale, path)?; + path: &str, + attribute: Option<&str>, + ) -> Result<(&'a FluentBundle, &'a Pattern<&'a str>), ServiceError> { + debug!("Checking for translation patterns in locale {locale}"); - info!( - "Translating for locale {}, message path {}, attribute {}", - locale, - path, - attribute.unwrap_or(""), - ); + // Get appropriate message and bundle, if found + let (bundle, message) = self.get_message(locale, path)?; - // Get pattern from message + // Get pattern from message, if present let pattern = match attribute { Some(attribute) => match message.get_attribute(attribute) { Some(attrib) => attrib.value(), @@ -155,13 +153,85 @@ impl Localizations { }, }; + Ok((bundle, pattern)) + } + + /// Iterate through a list of locales, and try to find the first existing pattern. + fn get_pattern_locales<'a, L, I>( + &'a self, + locales: I, + path: &str, + attribute: Option<&str>, + ) -> Result<(LanguageIdentifier, &'a FluentBundle, &'a Pattern<&'a str>), ServiceError> + where + L: AsRef + 'a, + I: IntoIterator, + { + let mut last_error = ServiceError::NoLocalesSpecified; // Occurs if locales is empty + + // Iterate through each locale to try + for locale_ref in locales { + // Iterate through each fallback locale (e.g. ['fr-BE'] -> ['fr-BE', 'fr']) + let locale = locale_ref.as_ref(); + let result = iterate_locale_fallbacks(locale.clone(), |locale| { + // Try and get bundle and pattern, if it exists + match self.get_pattern(locale, path, attribute) { + Err(error) => { + debug!("Pattern not found for locale {locale}: {error}"); + last_error = error; + None + } + Ok((bundle, pattern)) => { + info!("Found pattern for locale {locale}"); + Some((bundle, pattern)) + } + } + }); + + if let Some((locale, (bundle, pattern))) = result { + return Ok((locale, bundle, pattern)); + } + } + + warn!("Could not find any translation patterns: {last_error}"); + Err(last_error) + } + + /// Translates the message, given the message key and formatting arguments. + /// + /// At least one locale must be specified. If no translation can be found for + /// the given locale, then progressively more generic forms are attempted. If + /// no translations can be found even for all fallback locales, an error is + /// returned. + pub fn translate<'a, L, I>( + &'a self, + locales: I, + key: &str, + args: &'a FluentArgs<'a>, + ) -> Result, ServiceError> + where + L: AsRef + Display + 'a, + I: IntoIterator, + { + // Parse translation key + let (path, attribute) = Self::parse_selector(key); + info!( + "Checking message path {}, attribute {} for a matching locale", + path, + attribute.unwrap_or(""), + ); + + // Find pattern for translating + let (locale, bundle, pattern) = + self.get_pattern_locales(locales, path, attribute)?; + // Format using pattern let mut errors = vec![]; let output = bundle.format_pattern(pattern, Some(args), &mut errors); // Log any errors if !errors.is_empty() { - warn!("Errors formatting message for locale {locale}, message key {key}",); + warn!("Errors formatting message for locale {locale}, message key {key}"); for (key, value) in args.iter() { warn!("Passed formatting argument: {key} -> {value:?}"); @@ -172,6 +242,10 @@ impl Localizations { } } + // We could return the locale used if we wished, but presently we discard this information. + // Change the return type of this method and its users if you need this information. + let _ = locale; + // Done Ok(output) } diff --git a/deepwell/src/locales/mod.rs b/deepwell/src/locales/mod.rs index 98473713d9..7968837f10 100644 --- a/deepwell/src/locales/mod.rs +++ b/deepwell/src/locales/mod.rs @@ -20,8 +20,10 @@ mod arguments; mod error; +mod fallback; mod fluent; pub use self::arguments::{MessageArguments, MessageValue}; pub use self::error::*; +pub use self::fallback::iterate_locale_fallbacks; pub use self::fluent::Localizations; diff --git a/deepwell/src/services/error.rs b/deepwell/src/services/error.rs index 9a5e36f265..1a3f473f46 100644 --- a/deepwell/src/services/error.rs +++ b/deepwell/src/services/error.rs @@ -62,6 +62,9 @@ pub enum Error { #[error("Message key was found, but does not have this attribute")] LocaleMessageAttributeMissing, + #[error("No locales were specified in the request")] + NoLocalesSpecified, + #[error("Magic library error: {0}")] Magic(#[from] FileMagicError), @@ -368,6 +371,7 @@ impl Error { Error::LocaleMessageMissing => 4102, Error::LocaleMessageValueMissing => 4103, Error::LocaleMessageAttributeMissing => 4104, + Error::NoLocalesSpecified => 4105, // 4200 -- Login errors Error::EmptyPassword => 4200, diff --git a/deepwell/src/services/special_page/service.rs b/deepwell/src/services/special_page/service.rs index 21dbdba1de..fe1c66391f 100644 --- a/deepwell/src/services/special_page/service.rs +++ b/deepwell/src/services/special_page/service.rs @@ -38,7 +38,7 @@ impl SpecialPageService { ctx: &ServiceContext<'_>, site: &SiteModel, sp_page_type: SpecialPageType, - locale: &LanguageIdentifier, + locales: &[LanguageIdentifier], page_info: PageInfo<'_>, ) -> Result { info!( @@ -55,6 +55,8 @@ impl SpecialPageService { // exists is the one that's used. let config = ctx.config(); let (slugs, translate_key) = match sp_page_type { + // TODO: Figure out exact template ordering (e.g. _template vs cat:_template) + // See https://scuttle.atlassian.net/browse/WJ-1201 SpecialPageType::Template => (vec![cow!(config.special_page_template)], ""), SpecialPageType::Missing => { let slugs = Self::slugs_with_category( @@ -78,7 +80,7 @@ impl SpecialPageService { &slugs, translate_key, site.site_id, - locale, + locales, &page_info, ) .await?; @@ -124,7 +126,7 @@ impl SpecialPageService { slugs: &[Cow<'_, str>], translate_key: &str, site_id: i64, - locale: &LanguageIdentifier, + locales: &[LanguageIdentifier], page_info: &PageInfo<'_>, ) -> Result { debug!("Getting wikitext for special page, {} slugs", slugs.len()); @@ -160,7 +162,10 @@ impl SpecialPageService { args.set("category", fluent_str!(category)); args.set("domain", fluent_str!(ctx.config().main_domain_no_dot)); - let wikitext = ctx.localization().translate(locale, translate_key, &args)?; + let wikitext = ctx + .localization() + .translate(locales, translate_key, &args)?; + Ok(wikitext.into_owned()) } } diff --git a/deepwell/src/services/view/service.rs b/deepwell/src/services/view/service.rs index e7b1d2301c..d9c82e7743 100644 --- a/deepwell/src/services/view/service.rs +++ b/deepwell/src/services/view/service.rs @@ -45,6 +45,7 @@ use fluent::{FluentArgs, FluentValue}; use ftml::prelude::*; use ftml::render::html::HtmlOutput; use ref_map::*; +use std::borrow::Cow; use unic_langid::LanguageIdentifier; use wikidot_normalize::normalize; @@ -56,17 +57,18 @@ impl ViewService { ctx: &ServiceContext<'_>, GetPageView { domain, - locale: locale_str, + locales: locales_str, route, session_token, }: GetPageView, ) -> Result { info!( - "Getting page view data for domain '{}', route '{:?}', locale '{}'", - domain, route, locale_str, + "Getting page view data for domain '{}', route '{:?}', locales '{:?}'", + domain, route, locales_str, ); - let locale = LanguageIdentifier::from_bytes(locale_str.as_bytes())?; + // Parse all locales + let locales = parse_locales(&locales_str)?; // Attempt to get a viewer helper structure, but if the site doesn't exist // then return right away with the "no such site" response. @@ -76,7 +78,7 @@ impl ViewService { user_session, } = match Self::get_viewer( ctx, - &locale, + &locales, &domain, session_token.ref_map(|s| s.as_str()), ) @@ -107,7 +109,19 @@ impl ViewService { alt_title: None, score: ScoreValue::Integer(0), // TODO configurable default score value tags: vec![], - language: cow!(locale_str), + + // TODO Determine what locale should be passed here. + // There are ways we can determine which locale + // was used for a particular message, but there + // are several messages in play here, each of + // which may technically be a slightly different + // locale (in case of fallbacks etc). + // + // For now, just use the declared first locale + // passed in by the requester, since that's + // presumably what'd they'd *like* the message + // to be in, if translations are available. + language: Cow::Owned(str!(&locales[0])), }; // Helper structure to designate which variant of GetPageViewOutput to return. @@ -184,7 +198,7 @@ impl ViewService { wikitext, render_output, } = SpecialPageService::get( - ctx, &site, page_type, &locale, page_info, + ctx, &site, page_type, &locales, page_info, ) .await?; @@ -209,7 +223,7 @@ impl ViewService { ctx, &site, SpecialPageType::Missing, - &locale, + &locales, page_info, ) .await?; @@ -286,7 +300,7 @@ impl ViewService { /// operations, such as slug normalization or redirect site aliases. pub async fn get_viewer( ctx: &ServiceContext<'_>, - locale: &LanguageIdentifier, + locales: &[LanguageIdentifier], domain: &str, session_token: Option<&str>, ) -> Result { @@ -300,14 +314,15 @@ impl ViewService { (site, redirect_site) } SiteDomainResult::Slug(slug) => { - let html = Self::missing_site_output(ctx, locale, domain, Some(slug)) - .await?; + let html = + Self::missing_site_output(ctx, locales, domain, Some(slug)) + .await?; return Ok(ViewerResult::MissingSite(html)); } SiteDomainResult::CustomDomain(domain) => { let html = - Self::missing_site_output(ctx, locale, domain, None).await?; + Self::missing_site_output(ctx, locales, domain, None).await?; return Ok(ViewerResult::MissingSite(html)); } @@ -339,7 +354,7 @@ impl ViewService { /// Produce output for cases where a site does not exist. async fn missing_site_output( ctx: &ServiceContext<'_>, - locale: &LanguageIdentifier, + locales: &[LanguageIdentifier], domain: &str, site_slug: Option<&str>, ) -> Result { @@ -351,9 +366,11 @@ impl ViewService { args.set("slug", fluent_str!(site_slug)); args.set("domain", fluent_str!(config.main_domain_no_dot)); - let html = - ctx.localization() - .translate(locale, "wiki-page-site-slug", &args)?; + let html = ctx.localization().translate( + locales, + "wiki-page-site-slug", + &args, + )?; Ok(html.to_string()) } @@ -365,7 +382,7 @@ impl ViewService { args.set("domain", fluent_str!(config.main_domain_no_dot)); let html = ctx.localization().translate( - locale, + locales, "wiki-page-site-custom", &args, )?; @@ -417,3 +434,21 @@ impl ViewService { } } } + +/// Converts an array of strings to a list of locales. +/// +/// # Errors +/// If the input array is empty. +fn parse_locales>(locales_str: &[S]) -> Result> { + if locales_str.is_empty() { + warn!("List of locales is empty"); + return Err(Error::NoLocalesSpecified); + } + + let mut locales = Vec::with_capacity(locales_str.len()); + for locale_str in locales_str { + let locale = LanguageIdentifier::from_bytes(locale_str.as_ref().as_bytes())?; + locales.push(locale); + } + Ok(locales) +} diff --git a/deepwell/src/services/view/structs.rs b/deepwell/src/services/view/structs.rs index 203321a673..aa8f127f06 100644 --- a/deepwell/src/services/view/structs.rs +++ b/deepwell/src/services/view/structs.rs @@ -41,7 +41,7 @@ pub struct GetPageView { pub domain: String, pub session_token: Option, pub route: Option, - pub locale: String, + pub locales: Vec, } #[derive(Deserialize, Debug, Clone)]