Skip to content

Commit

Permalink
Improve QA checks for Chinese/Japanese
Browse files Browse the repository at this point in the history
Several improvements to reduce false positives:

- Add fullwidth comma to characters mappings in addition to enumeration
  comma, which was already present.

- Don't check case, doesn't make sense, sometimes trigger false
  positives when a Roman word is included.

- Don't check for trailing space consistency, because it isn't used e.g.
  after sentence-ending punctuation.

- Account for lack of trailing space when checking punctuation too.

See https://en.wikipedia.org/wiki/Chinese_punctuation

See #809
  • Loading branch information
vslavik committed Oct 16, 2023
1 parent b0ebc8d commit fc95391
Showing 1 changed file with 28 additions and 6 deletions.
34 changes: 28 additions & 6 deletions src/qa_checks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,12 @@ class CaseMismatch : public QACheck

CaseMismatch(Language lang) : m_lang(lang.Lang())
{
m_shouldCheck = (m_lang != "zh" && m_lang != "ja");
}

bool CheckString(CatalogItemPtr item, const wxString& source, const wxString& translation) override
{
if (source.length() < 2)
if (!m_shouldCheck || source.length() < 2)
return false;

// Detect that the source string is a sentence: should have 1st letter uppercase and 2nd lowercase,
Expand All @@ -229,6 +230,7 @@ class CaseMismatch : public QACheck
}

private:
bool m_shouldCheck;
std::string m_lang;
};

Expand All @@ -238,11 +240,17 @@ class WhitespaceMismatch : public QACheck
public:
QA_METADATA("whitespace", _("Inconsistent whitespace"))

WhitespaceMismatch(Language /*lang*/) {}
WhitespaceMismatch(Language lang)
{
auto l = lang.Lang();
// Space is used sparingly in these languages and e.g. not present after sentence-ending
// period. Checking trailing/leading space is therefore often a false positive.
m_checkSpaceInTranslation = (l != "zh" && l != "ja");
}

bool CheckString(CatalogItemPtr item, const wxString& source, const wxString& translation) override
{
if (u_isspace(source[0]) && !u_isspace(translation[0]))
if (m_checkSpaceInTranslation && u_isspace(source[0]) && !u_isspace(translation[0]))
{
item->SetIssue(CatalogItem::Issue::Warning, _(L"The translation doesn’t start with a space."));
return true;
Expand All @@ -266,7 +274,7 @@ class WhitespaceMismatch : public QACheck
return true;
}

if (u_isspace(source.Last()) && !u_isspace(translation.Last()))
if (m_checkSpaceInTranslation && u_isspace(source.Last()) && !u_isspace(translation.Last()))
{
item->SetIssue(CatalogItem::Issue::Warning, _(L"The translation is missing a space at the end."));
return true;
Expand All @@ -280,6 +288,9 @@ class WhitespaceMismatch : public QACheck

return false;
}

private:
bool m_checkSpaceInTranslation;
};


Expand All @@ -292,7 +303,7 @@ class PunctuationMismatch : public QACheck
{
}

bool CheckString(CatalogItemPtr item, const wxString& source, const wxString& translation) override
bool CheckString(CatalogItemPtr item, const wxString& source_, const wxString& translation) override
{
if (m_lang == "th" || m_lang == "lo" || m_lang == "km" || m_lang == "my")
{
Expand All @@ -305,6 +316,15 @@ class PunctuationMismatch : public QACheck
return false;
}

auto source(source_);
if (m_lang == "zh" || m_lang == "ja")
{
// Space is used sparingly in these languages andd e.g. not present after sentence-ending
// period, so strip it from the source if present and check punctuation w/o it.
if (u_isspace(source.Last()) && !u_isspace(translation.Last()))
source.Trim(/*fromRight:*/true);
}

const UChar32 s_last = source.Last();
const UChar32 t_last = translation.Last();
const bool s_punct = IsPunctuation(s_last);
Expand Down Expand Up @@ -407,13 +427,15 @@ class PunctuationMismatch : public QACheck
case '.':
return trans == L'';
case ',':
return trans == L'';
return trans == L'' || trans == L'';
case '!':
return trans == L'';
case '?':
return trans == L'';
case ':':
return trans == L'';
case ';':
return trans == L'';
case '(':
return trans == L'';
case ')':
Expand Down

0 comments on commit fc95391

Please sign in to comment.