Skip to content

Commit

Permalink
try to calculate character width
Browse files Browse the repository at this point in the history
  • Loading branch information
Philipp Otterbein committed Dec 17, 2024
1 parent 6a23803 commit 4268a45
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 24 deletions.
16 changes: 16 additions & 0 deletions packaging/dependencies.nix
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,22 @@ scope: {
meta.platforms = lib.platforms.all;
});

widecharwidth = stdenv.mkDerivation {
name = "widecharwidth";
dontConfigure = true;
dontBuild = true;
installPhase = ''
mkdir -p $out/include
cp $src/widechar_width.h $out/include
'';
src = pkgs.fetchFromGitHub {
owner = "ridiculousfish";
repo = "widecharwidth";
rev = "533e50efb0b9b122a08f2273337dbf6b44b03cc7";
hash = "sha256-Vy1jCv0wqV/4sNCQIYGKiHq5A8QGE6Q+1v8k3Cn6sJ4=";
};
};

inherit resolvePath filesetToSource;

mkMesonDerivation =
Expand Down
3 changes: 3 additions & 0 deletions src/libutil/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ deps_private += cpuid
nlohmann_json = dependency('nlohmann_json', version : '>= 3.9')
deps_public += nlohmann_json

cxx = meson.get_compiler('cpp')
cxx.has_header('widechar_width.h', required : true)

config_h = configure_file(
configuration : configdata,
output : 'config-util.hh',
Expand Down
2 changes: 2 additions & 0 deletions src/libutil/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
, libsodium
, nlohmann_json
, openssl
, widecharwidth

# Configuration Options

Expand Down Expand Up @@ -42,6 +43,7 @@ mkMesonLibrary (finalAttrs: {
brotli
libsodium
openssl
widecharwidth
] ++ lib.optional stdenv.hostPlatform.isx86_64 libcpuid
;

Expand Down
78 changes: 54 additions & 24 deletions src/libutil/terminal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,54 @@
# include <sys/ioctl.h>
#endif
#include <unistd.h>
#include <widechar_width.h>

namespace {

inline std::pair<int, size_t> charWidthUTF8Helper(std::string_view s)
{
size_t bytes = 1;
uint32_t ch = 0;
uint32_t max = 1U << 7;
if ((s[0] & 0x80) == 0) {
ch = s[0];
} else if ((s[0] & 0xe0) == 0xc0) {
ch = s[0] & 0x1f;
bytes = 2;
max = 1U << 11;
} else if ((s[0] & 0xf0) == 0xe0) {
ch = s[0] & 0x0f;
bytes = 3;
max = 1U << 16;
} else if ((s[0] & 0xf8) == 0xf0) {
ch = s[0] & 0x07;
bytes = 4;
max = 0x110000U;
} else {
return {bytes, bytes}; // invalid UTF-8 start byte
}
size_t len = s.size();
for (size_t i = 1; i < bytes; i++) {
if (i < len && (s[i] & 0x80)) {
ch = (ch << 6) | (s[i] & 0x3f);
} else {
return {i, i}; // invalid UTF-8 encoding; assume one character per byte
}
}
int width = bytes; // in case of overlong encoding
if (ch < max) {
width = widechar_wcwidth(ch);
if (width == widechar_ambiguous) {
width = 1; // just a guess...
} else if (width == widechar_widened_in_9) {
width = 2;
}
width = std::max(width, 0);
}
return {width, bytes};
}

}

namespace nix {

Expand Down Expand Up @@ -72,35 +120,17 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
i++;

else {
w++;
// Copy one UTF-8 character.
if ((*i & 0xe0) == 0xc0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
} else if ((*i & 0xf0) == 0xe0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
} else if ((*i & 0xf8) == 0xf0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
}
} else
t += *i++;
auto [chWidth, bytes] = charWidthUTF8Helper({i, s.end()});
w += chWidth;
if (w <= (size_t) width) {
t += {i, i + bytes};
}
i += bytes;
}
}

return t;
}


//////////////////////////////////////////////////////////////////////

static Sync<std::pair<unsigned short, unsigned short>> windowSize{{0, 0}};
Expand Down

0 comments on commit 4268a45

Please sign in to comment.