Skip to content

Commit

Permalink
Optimize CatalogItemsComparator for common case
Browse files Browse the repository at this point in the history
Rewrite the comparison function to be significantly faster [*] in the common case when no
accelerators are used in the string. This is done by simply avoiding another copy.

[*] Approximately 2x on macOS where wchar_t* is UTF-32 and needs to be converted to UTF-16
for ICU collator, and ~5x on Windows where UTF-16 is used natively.

Pre-convert strings to UTF-16 in CatalogItemsComparator

return referecne from Catalog::GetTranslation

pre-conversion: use on windows too (wip)

zzz
  • Loading branch information
vslavik committed Apr 4, 2024
1 parent 24eb482 commit eff057d
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 18 deletions.
32 changes: 24 additions & 8 deletions src/cat_sorting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,29 @@ CatalogItemsComparator::CatalogItemsComparator(const Catalog& catalog, const Sor
m_collator.reset(new unicode::Collator(catalog.GetSourceLanguage(), unicode::Collator::case_insensitive));
break;
}

// Prepare cache for faster comparison. ICU uses UTF-16 internally, we can singificantly
// speed up comparisons by doing string conversion in advance, in O(n) time and space, if
// the platform's native representation is UTF-32 or -8 (which it is on non-Windows).
// Moreover, the additional processing of removing accelerators is also done only once
// on all platforms, resulting in massive speedups on Windows too.
switch (m_order.by)
{
case SortOrder::By_Source:
m_sortKeys.reserve(m_catalog.GetCount());
for (auto& i: m_catalog.items())
m_sortKeys.push_back(ConvertToSortKey(i->GetString()));
break;

case SortOrder::By_Translation:
m_sortKeys.reserve(m_catalog.GetCount());
for (auto& i: m_catalog.items())
m_sortKeys.push_back(ConvertToSortKey(i->GetTranslation()));
break;

case SortOrder::By_FileOrder:
break;
}
}


Expand Down Expand Up @@ -156,16 +179,9 @@ bool CatalogItemsComparator::operator()(int i, int j) const
}

case SortOrder::By_Source:
{
auto r = CompareTranslationStrings(a.GetString(), b.GetString());
if ( r != 0 )
return r < 0;
break;
}

case SortOrder::By_Translation:
{
auto r = CompareTranslationStrings(a.GetTranslation(), b.GetTranslation());
auto r = m_collator->compare(m_sortKeys[i], m_sortKeys[j]);
if ( r != 0 )
return r < 0;
break;
Expand Down
31 changes: 23 additions & 8 deletions src/cat_sorting.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ struct SortOrder
bool errorsFirst;
};


/**
Comparator for sorting catalog items by different criteria.
*/
Expand All @@ -81,21 +82,35 @@ class CatalogItemsComparator
protected:
const CatalogItem& Item(int i) const { return *m_catalog[i]; }

unicode::Collator::result_type CompareTranslationStrings(wxString a, wxString b) const
// Pre-process given string and return it in a form efficient for comparing
// with ICU collator. This does two things:
// 1. Converts to UTF-16 (matters on non-Windows platforms where wchar_t is UTF-32)
// 2. Perform substitution of accelerator characters in the string
static str::UCharBuffer ConvertToSortKey(const wxString& a)
{
a.Replace("&", "");
a.Replace("_", "");

b.Replace("&", "");
b.Replace("_", "");

return m_collator->compare(a, b);
if (a.find_first_of(L"&_") == wxString::npos)
{
return str::to_icu(a);
}
else
{
wxString a_(a);
a_.Replace("&", "");
a_.Replace("_", "");

auto buf = str::to_icu(a_);
// on Windows to_icu() returns shallow view of the string, we need to make
// a deep copy because a_ is a local temporary variable:
buf.ensure_owned();
return buf;
}
}

private:
const Catalog& m_catalog;
SortOrder m_order;
std::unique_ptr<unicode::Collator> m_collator;
std::vector<str::UCharBuffer> m_sortKeys;
};


Expand Down
3 changes: 2 additions & 1 deletion src/catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ class CatalogItem
unsigned GetPluralFormsCount() const;

/// Returns the nth-translation.
wxString GetTranslation(unsigned n = 0) const;
wxString GetTranslation(unsigned n) const;
const wxString& GetTranslation() const { return m_translations[0]; }

/// Returns all translations.
const wxArrayString& GetTranslations() const { return m_translations; }
Expand Down
15 changes: 14 additions & 1 deletion src/str_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,22 @@ class UCharBuffer
operator const UChar*() const { return m_data; }
UChar* data() { return const_cast<UChar*>(m_data); }

// available buffer size, only for owned versions, returns 0 for read-only non-owned
/// Available buffer size, only for owned versions, returns 0 for read-only non-owned
int32_t capacity() { return m_capacity; }

/// Ensure the buffer has a deep copy of the data, if it's not already owned
void ensure_owned()
{
if (m_owned)
return;
if (m_capacity == -1)
m_capacity = u_strlen(m_data) + 1;
auto copy = new UChar[m_capacity];
memcpy(copy, m_data, m_capacity * sizeof(UChar));
m_data = copy;
m_owned = true;
}

private:
UCharBuffer(bool owned, const UChar *data, int32_t capacity) : m_owned(owned), m_data(data), m_capacity(capacity) {}

Expand Down
5 changes: 5 additions & 0 deletions src/unicode_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ class Collator
return ucol_strcoll(m_coll, str::to_icu(a), -1, str::to_icu(b), -1);
}

result_type compare(const UChar *a, const UChar *b) const
{
return ucol_strcoll(m_coll, a, -1, b, -1);
}

template<typename T>
bool operator()(const T& a, const T& b) const
{
Expand Down

0 comments on commit eff057d

Please sign in to comment.