From 4ab808ced9c79851ccdd964afe75468fa0958ca9 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Mon, 1 Jan 2024 21:53:32 +0100 Subject: [PATCH] fix: utf8_truncate() handling of zero-width chars + test --- src/dwarfs/util.cpp | 2 +- test/utils_test.cpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/dwarfs/util.cpp b/src/dwarfs/util.cpp index f7da89879..c37913c76 100644 --- a/src/dwarfs/util.cpp +++ b/src/dwarfs/util.cpp @@ -224,7 +224,7 @@ void utf8_truncate(std::string& str, size_t len) { char const* const e = p + str.size(); size_t l = 0; - while (p < e && l < len) { + while (p < e && l <= len) { auto np = p; auto cp = utf8::next(np, e); l += dwarfs_wcwidth(cp); diff --git a/test/utils_test.cpp b/test/utils_test.cpp index 6cf29808a..22138ba3c 100644 --- a/test/utils_test.cpp +++ b/test/utils_test.cpp @@ -47,6 +47,34 @@ TEST(utils, utf8_display_width) { u8"unicode/我爱你/☀️ Sun/Γειά σας/مرحبًا/⚽️/Карибського"))); } +TEST(utils, uft8_truncate) { + auto u8trunc = [](std::u8string str, size_t len) { + auto tmp = u8string_to_string(str); + utf8_truncate(tmp, len); + return string_to_u8string(tmp); + }; + + // -----------------123456789012345-- + auto const str = u8"我爱你/مرحبًا/⚽️"; + + EXPECT_EQ(str, u8trunc(str, 15)); + // ----------123456789012345-- + EXPECT_EQ(u8"我爱你/مرحبًا/", u8trunc(str, 14)); + EXPECT_EQ(u8"我爱你/مرحبًا/", u8trunc(str, 13)); + EXPECT_EQ(u8"我爱你/مرحبًا", u8trunc(str, 12)); + EXPECT_EQ(u8"我爱你/مرحبً", u8trunc(str, 11)); + EXPECT_EQ(u8"我爱你/مرح", u8trunc(str, 10)); + EXPECT_EQ(u8"我爱你/مر", u8trunc(str, 9)); + EXPECT_EQ(u8"我爱你/م", u8trunc(str, 8)); + EXPECT_EQ(u8"我爱你/", u8trunc(str, 7)); + EXPECT_EQ(u8"我爱你", u8trunc(str, 6)); + EXPECT_EQ(u8"我爱", u8trunc(str, 5)); + EXPECT_EQ(u8"我爱", u8trunc(str, 4)); + EXPECT_EQ(u8"我", u8trunc(str, 3)); + EXPECT_EQ(u8"我", u8trunc(str, 2)); + EXPECT_EQ(u8"", u8trunc(str, 1)); +} + TEST(utils, shorten_path_ascii) { std::string const orig = "/foo/bar/home/bla/mnt/doc/html/boost_asio/reference/"