Skip to content

Commit

Permalink
try to calculate character width
Browse files Browse the repository at this point in the history
  • Loading branch information
Philipp Otterbein committed Dec 27, 2024
1 parent 6a23803 commit 9182545
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 29 deletions.
16 changes: 16 additions & 0 deletions packaging/dependencies.nix
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,22 @@ scope: {
meta.platforms = lib.platforms.all;
});

widecharwidth = stdenv.mkDerivation {
name = "widecharwidth";
dontConfigure = true;
dontBuild = true;
installPhase = ''
mkdir -p $out/include
cp $src/widechar_width.h $out/include
'';
src = pkgs.fetchFromGitHub {
owner = "ridiculousfish";
repo = "widecharwidth";
rev = "533e50efb0b9b122a08f2273337dbf6b44b03cc7";
hash = "sha256-Vy1jCv0wqV/4sNCQIYGKiHq5A8QGE6Q+1v8k3Cn6sJ4=";
};
};

inherit resolvePath filesetToSource;

mkMesonDerivation =
Expand Down
4 changes: 4 additions & 0 deletions src/libutil-tests/terminal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ TEST(filterANSIEscapes, utf8)
ASSERT_EQ(filterANSIEscapes("fóóbär", true, 3), "fóó");
ASSERT_EQ(filterANSIEscapes("f€€bär", true, 4), "f€€b");
ASSERT_EQ(filterANSIEscapes("f𐍈𐍈bär", true, 4), "f𐍈𐍈b");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 6), "f🔍bar");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 3), "f🔍");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 2), "f");
ASSERT_EQ(filterANSIEscapes("foo\u0301", true, 3), "foó");
}

TEST(filterANSIEscapes, osc8)
Expand Down
3 changes: 3 additions & 0 deletions src/libutil/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ deps_private += cpuid
nlohmann_json = dependency('nlohmann_json', version : '>= 3.9')
deps_public += nlohmann_json

cxx = meson.get_compiler('cpp')
cxx.has_header('widechar_width.h', required : true)

config_h = configure_file(
configuration : configdata,
output : 'config-util.hh',
Expand Down
2 changes: 2 additions & 0 deletions src/libutil/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
, libsodium
, nlohmann_json
, openssl
, widecharwidth

# Configuration Options

Expand Down Expand Up @@ -42,6 +43,7 @@ mkMesonLibrary (finalAttrs: {
brotli
libsodium
openssl
widecharwidth
] ++ lib.optional stdenv.hostPlatform.isx86_64 libcpuid
;

Expand Down
90 changes: 61 additions & 29 deletions src/libutil/terminal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,53 @@
# include <sys/ioctl.h>
#endif
#include <unistd.h>
#include <widechar_width.h>

namespace {

inline std::pair<int, size_t> charWidthUTF8Helper(std::string_view s)
{
size_t bytes = 1;
uint32_t ch = s[0];
uint32_t max = 1U << 7;
if ((ch & 0x80U) == 0U) {
} else if ((ch & 0xe0U) == 0xc0U) {
ch &= 0x1fU;
bytes = 2;
max = 1U << 11;
} else if ((ch & 0xf0U) == 0xe0U) {
ch &= 0x0fU;
bytes = 3;
max = 1U << 16;
} else if ((ch & 0xf8U) == 0xf0U) {
ch &= 0x07U;
bytes = 4;
max = 0x110000U;
} else {
return {bytes, bytes}; // invalid UTF-8 start byte
}
for (size_t i = 1; i < bytes; i++) {
if (i < s.size() && (s[i] & 0xc0) == 0x80) {
ch = (ch << 6) | (s[i] & 0x3f);
} else {
return {i, i}; // invalid UTF-8 encoding; assume one character per byte
}
}
int width = bytes; // in case of overlong encoding
if (ch < max) {
width = widechar_wcwidth(ch);
if (width == widechar_ambiguous) {
width = 1; // just a guess...
} else if (width == widechar_widened_in_9) {
width = 2;
} else if (width < 0) {
width = 0;
}
}
return {width, bytes};
}

}

namespace nix {

Expand All @@ -30,7 +77,7 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
size_t w = 0;
auto i = s.begin();

while (w < (size_t) width && i != s.end()) {
while (i != s.end()) {

if (*i == '\e') {
std::string e;
Expand Down Expand Up @@ -61,46 +108,31 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
}

else if (*i == '\t') {
i++; t += ' '; w++;
while (w < (size_t) width && w % 8) {
t += ' '; w++;
}
do {
if (++w > (size_t) width)
return t;
t += ' ';
} while (w % 8);
i++;
}

else if (*i == '\r' || *i == '\a')
// do nothing for now
i++;

else {
w++;
// Copy one UTF-8 character.
if ((*i & 0xe0) == 0xc0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
} else if ((*i & 0xf0) == 0xe0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
} else if ((*i & 0xf8) == 0xf0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
}
} else
t += *i++;
auto [chWidth, bytes] = charWidthUTF8Helper({i, s.end()});
w += chWidth;
if (w > (size_t) width) {
break;
}
t += {i, i + bytes};
i += bytes;
}
}

return t;
}


//////////////////////////////////////////////////////////////////////

static Sync<std::pair<unsigned short, unsigned short>> windowSize{{0, 0}};
Expand Down

0 comments on commit 9182545

Please sign in to comment.