Skip to content

Commit

Permalink
fix: utf8_truncate() handling of zero-width chars + test
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Jan 1, 2024
1 parent 3575bfb commit 4ab808c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/dwarfs/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ void utf8_truncate(std::string& str, size_t len) {
char const* const e = p + str.size();
size_t l = 0;

while (p < e && l < len) {
while (p < e && l <= len) {
auto np = p;
auto cp = utf8::next(np, e);
l += dwarfs_wcwidth(cp);
Expand Down
28 changes: 28 additions & 0 deletions test/utils_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,34 @@ TEST(utils, utf8_display_width) {
u8"unicode/我爱你/☀️ Sun/Γειά σας/مرحبًا/⚽️/Карибського")));
}

TEST(utils, uft8_truncate) {
auto u8trunc = [](std::u8string str, size_t len) {
auto tmp = u8string_to_string(str);
utf8_truncate(tmp, len);
return string_to_u8string(tmp);
};

// -----------------123456789012345--
auto const str = u8"我爱你/مرحبًا/⚽️";

EXPECT_EQ(str, u8trunc(str, 15));
// ----------123456789012345--
EXPECT_EQ(u8"我爱你/مرحبًا/", u8trunc(str, 14));
EXPECT_EQ(u8"我爱你/مرحبًا/", u8trunc(str, 13));
EXPECT_EQ(u8"我爱你/مرحبًا", u8trunc(str, 12));
EXPECT_EQ(u8"我爱你/مرحبً", u8trunc(str, 11));
EXPECT_EQ(u8"我爱你/مرح", u8trunc(str, 10));
EXPECT_EQ(u8"我爱你/مر", u8trunc(str, 9));
EXPECT_EQ(u8"我爱你/م", u8trunc(str, 8));
EXPECT_EQ(u8"我爱你/", u8trunc(str, 7));
EXPECT_EQ(u8"我爱你", u8trunc(str, 6));
EXPECT_EQ(u8"我爱", u8trunc(str, 5));
EXPECT_EQ(u8"我爱", u8trunc(str, 4));
EXPECT_EQ(u8"", u8trunc(str, 3));
EXPECT_EQ(u8"", u8trunc(str, 2));
EXPECT_EQ(u8"", u8trunc(str, 1));
}

TEST(utils, shorten_path_ascii) {
std::string const orig =
"/foo/bar/home/bla/mnt/doc/html/boost_asio/reference/"
Expand Down

0 comments on commit 4ab808c

Please sign in to comment.