Skip to content

Commit

Permalink
try to calculate character width
Browse files Browse the repository at this point in the history
  • Loading branch information
Philipp Otterbein authored and Mic92 committed Dec 28, 2024
1 parent 6a23803 commit 6fa3984
Show file tree
Hide file tree
Showing 6 changed files with 1,635 additions and 29 deletions.
4 changes: 4 additions & 0 deletions src/libutil-tests/terminal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ TEST(filterANSIEscapes, utf8)
ASSERT_EQ(filterANSIEscapes("fóóbär", true, 3), "fóó");
ASSERT_EQ(filterANSIEscapes("f€€bär", true, 4), "f€€b");
ASSERT_EQ(filterANSIEscapes("f𐍈𐍈bär", true, 4), "f𐍈𐍈b");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 6), "f🔍bar");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 3), "f🔍");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 2), "f");
ASSERT_EQ(filterANSIEscapes("foo\u0301", true, 3), "foó");
}

TEST(filterANSIEscapes, osc8)
Expand Down
6 changes: 6 additions & 0 deletions src/libutil/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ deps_private += cpuid
nlohmann_json = dependency('nlohmann_json', version : '>= 3.9')
deps_public += nlohmann_json

cxx = meson.get_compiler('cpp')

config_h = configure_file(
configuration : configdata,
output : 'config-util.hh',
Expand Down Expand Up @@ -168,6 +170,10 @@ sources = files(
)

include_dirs = [include_directories('.')]
if not cxx.has_header('widechar_width.h', required : false)
# use vendored widechar_width.h
include_dirs += include_directories('./widecharwidth')
endif

headers = [config_h] + files(
'abstract-setting-to-json.hh',
Expand Down
1 change: 1 addition & 0 deletions src/libutil/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mkMesonLibrary (finalAttrs: {
./nix-meson-build-support
../../.version
./.version
./widecharwidth
./meson.build
./meson.options
./linux/meson.build
Expand Down
90 changes: 61 additions & 29 deletions src/libutil/terminal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,53 @@
# include <sys/ioctl.h>
#endif
#include <unistd.h>
#include <widechar_width.h>

namespace {

inline std::pair<int, size_t> charWidthUTF8Helper(std::string_view s)
{
size_t bytes = 1;
uint32_t ch = s[0];
uint32_t max = 1U << 7;
if ((ch & 0x80U) == 0U) {
} else if ((ch & 0xe0U) == 0xc0U) {
ch &= 0x1fU;
bytes = 2;
max = 1U << 11;
} else if ((ch & 0xf0U) == 0xe0U) {
ch &= 0x0fU;
bytes = 3;
max = 1U << 16;
} else if ((ch & 0xf8U) == 0xf0U) {
ch &= 0x07U;
bytes = 4;
max = 0x110000U;
} else {
return {bytes, bytes}; // invalid UTF-8 start byte
}
for (size_t i = 1; i < bytes; i++) {
if (i < s.size() && (s[i] & 0xc0) == 0x80) {
ch = (ch << 6) | (s[i] & 0x3f);
} else {
return {i, i}; // invalid UTF-8 encoding; assume one character per byte
}
}
int width = bytes; // in case of overlong encoding
if (ch < max) {
width = widechar_wcwidth(ch);
if (width == widechar_ambiguous) {
width = 1; // just a guess...
} else if (width == widechar_widened_in_9) {
width = 2;
} else if (width < 0) {
width = 0;
}
}
return {width, bytes};
}

}

namespace nix {

Expand All @@ -30,7 +77,7 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
size_t w = 0;
auto i = s.begin();

while (w < (size_t) width && i != s.end()) {
while (i != s.end()) {

if (*i == '\e') {
std::string e;
Expand Down Expand Up @@ -61,46 +108,31 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
}

else if (*i == '\t') {
i++; t += ' '; w++;
while (w < (size_t) width && w % 8) {
t += ' '; w++;
}
do {
if (++w > (size_t) width)
return t;
t += ' ';
} while (w % 8);
i++;
}

else if (*i == '\r' || *i == '\a')
// do nothing for now
i++;

else {
w++;
// Copy one UTF-8 character.
if ((*i & 0xe0) == 0xc0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
} else if ((*i & 0xf0) == 0xe0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
} else if ((*i & 0xf8) == 0xf0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
}
} else
t += *i++;
auto [chWidth, bytes] = charWidthUTF8Helper({i, s.end()});
w += chWidth;
if (w > (size_t) width) {
break;
}
t += {i, i + bytes};
i += bytes;
}
}

return t;
}


//////////////////////////////////////////////////////////////////////

static Sync<std::pair<unsigned short, unsigned short>> windowSize{{0, 0}};
Expand Down
4 changes: 4 additions & 0 deletions src/libutil/widecharwidth/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
widecharwidth - wcwidth implementation
Written in 2018 by ridiculous_fish
To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
Loading

0 comments on commit 6fa3984

Please sign in to comment.