Skip to content

Commit

Permalink
Implement V8::String::{Utf8Length, IsOneByte, `ContainsOnlyOneB…
Browse files Browse the repository at this point in the history
…yte`, `IsExternal`, `IsExternalTwoByte`, `IsExternalOneByte`} (#13417)
  • Loading branch information
Jarred-Sumner authored Aug 20, 2024
1 parent eb8ed27 commit f16d802
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 3 deletions.
74 changes: 71 additions & 3 deletions src/bun.js/bindings/v8/V8String.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "V8String.h"

#include "V8HandleScope.h"
#include "wtf/SIMDUTF.h"

using JSC::JSString;

Expand Down Expand Up @@ -30,6 +31,75 @@ MaybeLocal<String> String::NewFromUtf8(Isolate* isolate, char const* data, NewSt
return MaybeLocal<String>(isolate->globalInternals()->currentHandleScope()->createLocal<String>(jsString));
}

int String::Utf8Length(Isolate* isolate) const
{
auto jsString = localToObjectPointer<JSString>();
if (jsString->length() == 0) {
return 0;
}

auto str = jsString->view(isolate->globalObject());
if (str->is8Bit()) {
const auto span = str->span8();
size_t len = simdutf::utf8_length_from_latin1(reinterpret_cast<const char*>(span.data()), span.size());
return static_cast<int>(len);
} else {
const auto span = str->span16();
size_t len = simdutf::utf8_length_from_utf16(span.data(), span.size());
return static_cast<int>(len);
}
}

bool String::IsOneByte() const
{
auto jsString = localToObjectPointer<JSString>();
if (jsString->length() == 0) {
return true;
}
auto impl = jsString->tryGetValue();
return impl->is8Bit();
}

bool String::ContainsOnlyOneByte() const
{
auto jsString = localToObjectPointer<JSString>();
if (jsString->length() == 0) {
return true;
}
auto impl = jsString->tryGetValue();
return impl->containsOnlyLatin1();
}

bool String::IsExternal() const
{
auto jsString = localToObjectPointer<JSString>();
if (jsString->length() == 0) {
return false;
}
auto impl = jsString->tryGetValue();
return !impl->isNull() && impl->impl()->isExternal();
}

bool String::IsExternalTwoByte() const
{
auto jsString = localToObjectPointer<JSString>();
if (jsString->length() == 0) {
return false;
}
auto impl = jsString->tryGetValue();
return !impl->isNull() && impl->impl()->isExternal() && !impl->is8Bit();
}

bool String::IsExternalOneByte() const
{
auto jsString = localToObjectPointer<JSString>();
if (jsString->length() == 0) {
return false;
}
auto impl = jsString->tryGetValue();
return !impl->isNull() && impl->impl()->isExternal() && impl->is8Bit();
}

extern "C" size_t TextEncoder__encodeInto8(const LChar* stringPtr, size_t stringLen, void* ptr, size_t len);
extern "C" size_t TextEncoder__encodeInto16(const UChar* stringPtr, size_t stringLen, void* ptr, size_t len);

Expand Down Expand Up @@ -69,9 +139,7 @@ int String::WriteUtf8(Isolate* isolate, char* buffer, int length, int* nchars_re
int String::Length() const
{
auto jsString = localToObjectPointer<JSString>();
RELEASE_ASSERT(jsString->isString());
WTF::String s = jsString->getString(Isolate::GetCurrent()->globalObject());
return s.length();
return static_cast<int>(jsString->length());
}

}
36 changes: 36 additions & 0 deletions src/bun.js/bindings/v8/V8String.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,42 @@ class String : Primitive {
BUN_EXPORT int WriteUtf8(Isolate* isolate, char* buffer, int length = -1, int* nchars_ref = nullptr, int options = NO_OPTIONS) const;
BUN_EXPORT int Length() const;

/**
* Returns the number of bytes in the UTF-8 encoded
* representation of this string.
*/
BUN_EXPORT int Utf8Length(Isolate* isolate) const;

/**
* Returns whether this string is known to contain only one byte data,
* i.e. ISO-8859-1 code points.
* Does not read the string.
* False negatives are possible.
*/
BUN_EXPORT bool IsOneByte() const;

/**
* Returns whether this string contain only one byte data,
* i.e. ISO-8859-1 code points.
* Will read the entire string in some cases.
*/
BUN_EXPORT bool ContainsOnlyOneByte() const;

/**
* Returns true if the string is external.
*/
BUN_EXPORT bool IsExternal() const;

/**
* Returns true if the string is both external and two-byte.
*/
BUN_EXPORT bool IsExternalTwoByte() const;

/**
* Returns true if the string is both external and one-byte.
*/
BUN_EXPORT bool IsExternalOneByte() const;

JSC::JSString* localToJSString()
{
return localToObjectPointer<JSC::JSString>();
Expand Down
12 changes: 12 additions & 0 deletions src/napi/napi.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1744,6 +1744,12 @@ const V8API = if (!bun.Environment.isWindows) struct {
pub extern fn _ZNK2v85Value8IsStringEv() *anyopaque;
pub extern fn _ZN2v87Boolean3NewEPNS_7IsolateEb() *anyopaque;
pub extern fn _ZN2v86Object16GetInternalFieldEi() *anyopaque;
pub extern fn _ZNK2v86String10Utf8LengthEPNS_7IsolateE() *anyopaque;
pub extern fn _ZNK2v86String10IsExternalEv() *anyopaque;
pub extern fn _ZNK2v86String17IsExternalOneByteEv() *anyopaque;
pub extern fn _ZNK2v86String17IsExternalTwoByteEv() *anyopaque;
pub extern fn _ZNK2v86String9IsOneByteEv() *anyopaque;
pub extern fn _ZNK2v86String19ContainsOnlyOneByteEv() *anyopaque;
} else struct {
// MSVC name mangling is different than it is on unix.
// To make this easier to deal with, I have provided a script to generate the list of functions.
Expand Down Expand Up @@ -1800,6 +1806,12 @@ const V8API = if (!bun.Environment.isWindows) struct {
pub extern fn @"?IsString@Value@v8@@QEBA_NXZ"() *anyopaque;
pub extern fn @"?New@Boolean@v8@@SA?AV?$Local@VBoolean@v8@@@2@PEAVIsolate@2@_N@Z"() *anyopaque;
pub extern fn @"?GetInternalField@Object@v8@@QEAA?AV?$Local@VData@v8@@@2@H@Z"() *anyopaque;
pub extern fn @"?IsExternal@String@v8@@QEBA_NXZ"() *anyopaque;
pub extern fn @"?IsExternalOneByte@String@v8@@QEBA_NXZ"() *anyopaque;
pub extern fn @"?IsExternalTwoByte@String@v8@@QEBA_NXZ"() *anyopaque;
pub extern fn @"?IsOneByte@String@v8@@QEBA_NXZ"() *anyopaque;
pub extern fn @"?Utf8Length@String@v8@@QEBAHPEAVIsolate@2@@Z"() *anyopaque;
pub extern fn @"?ContainsOnlyOneByte@String@v8@@QEBA_NXZ"() *anyopaque;
};

pub fn fixDeadCodeElimination() void {
Expand Down
6 changes: 6 additions & 0 deletions src/symbols.def
Original file line number Diff line number Diff line change
Expand Up @@ -616,3 +616,9 @@ EXPORTS
?IsString@Value@v8@@QEBA_NXZ
?New@Boolean@v8@@SA?AV?$Local@VBoolean@v8@@@2@PEAVIsolate@2@_N@Z
?GetInternalField@Object@v8@@QEAA?AV?$Local@VData@v8@@@2@H@Z
?IsExternal@String@v8@@QEBA_NXZ
?IsExternalOneByte@String@v8@@QEBA_NXZ
?IsExternalTwoByte@String@v8@@QEBA_NXZ
?IsOneByte@String@v8@@QEBA_NXZ
?Utf8Length@String@v8@@QEBAHPEAVIsolate@2@@Z
?ContainsOnlyOneByte@String@v8@@QEBA_NXZ
6 changes: 6 additions & 0 deletions src/symbols.dyn
Original file line number Diff line number Diff line change
Expand Up @@ -203,4 +203,10 @@
__ZNK2v85Value8IsStringEv;
__ZN2v87Boolean3NewEPNS_7IsolateEb;
__ZN2v86Object16GetInternalFieldEi;
__ZNK2v86String10Utf8LengthEPNS_7IsolateE;
__ZNK2v86String10IsExternalEv;
__ZNK2v86String17IsExternalOneByteEv;
__ZNK2v86String17IsExternalTwoByteEv;
__ZNK2v86String9IsOneByteEv;
__ZNK2v86String19ContainsOnlyOneByteEv;
};
6 changes: 6 additions & 0 deletions src/symbols.txt
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,9 @@ __ZNK2v85Value7IsFalseEv
__ZNK2v85Value8IsStringEv
__ZN2v87Boolean3NewEPNS_7IsolateEb
__ZN2v86Object16GetInternalFieldEi
__ZNK2v86String10Utf8LengthEPNS_7IsolateE
__ZNK2v86String10IsExternalEv
__ZNK2v86String17IsExternalOneByteEv
__ZNK2v86String17IsExternalTwoByteEv
__ZNK2v86String9IsOneByteEv
__ZNK2v86String19ContainsOnlyOneByteEv
11 changes: 11 additions & 0 deletions test/v8/v8-module/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,17 @@ static bool perform_string_test(const FunctionCallbackInfo<Value> &info,
return false;
}

if (v8_string->Utf8Length(isolate) != encoded_utf_8_length) {
fail(info, "String::Utf8Length returned wrong length: expected %d got %d",
encoded_utf_8_length, v8_string->Utf8Length(isolate));
return false;
}

if (v8_string->IsExternal()) {
fail(info, "String::IsExternal returned true");
return false;
}

ok(info);
return true;
}
Expand Down

0 comments on commit f16d802

Please sign in to comment.