diff --git a/docs/runtime/nodejs-apis.md b/docs/runtime/nodejs-apis.md index 91a8b61a203d93..ef201aa2f2f1d1 100644 --- a/docs/runtime/nodejs-apis.md +++ b/docs/runtime/nodejs-apis.md @@ -413,7 +413,7 @@ The table below lists all globals implemented by Node.js and Bun's current compa ### [`TextDecoderStream`](https://developer.mozilla.org/en-US/docs/Web/API/TextDecoderStream) -🔴 Not implemented. +🟢 Fully implemented. ### [`TextEncoder`](https://developer.mozilla.org/en-US/docs/Web/API/TextEncoder) @@ -421,7 +421,7 @@ The table below lists all globals implemented by Node.js and Bun's current compa ### [`TextEncoderStream`](https://developer.mozilla.org/en-US/docs/Web/API/TextEncoderStream) -🔴 Not implemented. +🟢 Fully implemented. ### [`TransformStream`](https://developer.mozilla.org/en-US/docs/Web/API/TransformStream) diff --git a/src/bun.js/base.zig b/src/bun.js/base.zig index 0a47be532501fa..ec0ed774a0c9cd 100644 --- a/src/bun.js/base.zig +++ b/src/bun.js/base.zig @@ -405,6 +405,16 @@ pub const ArrayBuffer = extern struct { return Bun__createUint8ArrayForCopy(globalThis, bytes.ptr, bytes.len, true); } + extern "C" fn Bun__allocUint8ArrayForCopy(*JSC.JSGlobalObject, usize, **anyopaque) JSValue; + pub fn allocBuffer(globalThis: *JSC.JSGlobalObject, len: usize) struct { JSValue, []u8 } { + var ptr: [*]u8 = undefined; + const buffer = Bun__allocUint8ArrayForCopy(globalThis, len, @ptrCast(&ptr)); + if (buffer.isEmpty()) { + return .{ buffer, &.{} }; + } + return .{ buffer, ptr[0..len] }; + } + extern "C" fn Bun__createUint8ArrayForCopy(*JSC.JSGlobalObject, ptr: ?*const anyopaque, len: usize, buffer: bool) JSValue; extern "C" fn Bun__createArrayBufferForCopy(*JSC.JSGlobalObject, ptr: ?*const anyopaque, len: usize) JSValue; @@ -761,6 +771,7 @@ const TestScope = Test.TestScope; const NodeFS = JSC.Node.NodeFS; const TextEncoder = WebCore.TextEncoder; const TextDecoder = WebCore.TextDecoder; +const TextEncoderStreamEncoder = WebCore.TextEncoderStreamEncoder; const HTMLRewriter = JSC.Cloudflare.HTMLRewriter; const Element = JSC.Cloudflare.Element; const Comment = JSC.Cloudflare.Comment; diff --git a/src/bun.js/bindings/JSDOMWrapper.h b/src/bun.js/bindings/JSDOMWrapper.h index 4698f86d856ba6..bff0179a4586ac 100644 --- a/src/bun.js/bindings/JSDOMWrapper.h +++ b/src/bun.js/bindings/JSDOMWrapper.h @@ -84,6 +84,7 @@ class JSDOMWrapper : public JSDOMObject { using DOMWrapped = ImplementationClass; ImplementationClass& wrapped() const { return m_wrapped; } + Ref protectedWrapped() const { return m_wrapped; } static ptrdiff_t offsetOfWrapped() { return OBJECT_OFFSETOF(JSDOMWrapper, m_wrapped); } constexpr static bool hasCustomPtrTraits() { return !std::is_same_v>; }; diff --git a/src/bun.js/bindings/ZigGlobalObject.cpp b/src/bun.js/bindings/ZigGlobalObject.cpp index d183cde4b881ca..ff684340fc202f 100644 --- a/src/bun.js/bindings/ZigGlobalObject.cpp +++ b/src/bun.js/bindings/ZigGlobalObject.cpp @@ -116,6 +116,8 @@ #include "JSSQLStatement.h" #include "JSStringDecoder.h" #include "JSTextEncoder.h" +#include "JSTextEncoderStream.h" +#include "JSTextDecoderStream.h" #include "JSTransformStream.h" #include "JSTransformStreamDefaultController.h" #include "JSURLSearchParams.h" @@ -1268,6 +1270,8 @@ WEBCORE_GENERATED_CONSTRUCTOR_GETTER(ReadableStreamDefaultController) WEBCORE_GENERATED_CONSTRUCTOR_GETTER(ReadableStreamDefaultReader) WEBCORE_GENERATED_CONSTRUCTOR_GETTER(SubtleCrypto); WEBCORE_GENERATED_CONSTRUCTOR_GETTER(TextEncoder); +WEBCORE_GENERATED_CONSTRUCTOR_GETTER(TextEncoderStream); +WEBCORE_GENERATED_CONSTRUCTOR_GETTER(TextDecoderStream); WEBCORE_GENERATED_CONSTRUCTOR_GETTER(TransformStream) WEBCORE_GENERATED_CONSTRUCTOR_GETTER(TransformStreamDefaultController) WEBCORE_GENERATED_CONSTRUCTOR_GETTER(URLSearchParams); @@ -1696,6 +1700,22 @@ extern "C" JSC__JSValue Bun__createArrayBufferForCopy(JSC::JSGlobalObject* globa RELEASE_AND_RETURN(scope, JSValue::encode(JSC::JSArrayBuffer::create(globalObject->vm(), globalObject->arrayBufferStructure(JSC::ArrayBufferSharingMode::Default), WTFMove(arrayBuffer)))); } +extern "C" JSC__JSValue Bun__allocUint8ArrayForCopy(JSC::JSGlobalObject* globalObject, size_t len, void** ptr) +{ + auto scope = DECLARE_THROW_SCOPE(globalObject->vm()); + + JSC::JSUint8Array* array = JSC::JSUint8Array::createUninitialized(globalObject, globalObject->m_typedArrayUint8.get(globalObject), len); + + if (UNLIKELY(!array)) { + JSC::throwOutOfMemoryError(globalObject, scope); + return encodedJSValue(); + } + + *ptr = array->vector(); + + return JSValue::encode(array); +} + extern "C" JSC__JSValue Bun__createUint8ArrayForCopy(JSC::JSGlobalObject* globalObject, const void* ptr, size_t len, bool isBuffer) { auto scope = DECLARE_THROW_SCOPE(globalObject->vm()); @@ -3362,6 +3382,7 @@ void GlobalObject::addBuiltinGlobals(JSC::VM& vm) GlobalPropertyInfo(builtinNames.internalModuleRegistryPrivateName(), this->internalModuleRegistry(), PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly), GlobalPropertyInfo(builtinNames.processBindingConstantsPrivateName(), this->processBindingConstants(), PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly), GlobalPropertyInfo(builtinNames.requireMapPrivateName(), this->requireMap(), PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly | 0), + GlobalPropertyInfo(builtinNames.TextEncoderStreamEncoderPrivateName(), JSTextEncoderStreamEncoderConstructor(), PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly | 0), }; addStaticGlobals(staticGlobals, std::size(staticGlobals)); diff --git a/src/bun.js/bindings/ZigGlobalObject.lut.txt b/src/bun.js/bindings/ZigGlobalObject.lut.txt index 91eb9d3e94c6fc..106143b94aaf5a 100644 --- a/src/bun.js/bindings/ZigGlobalObject.lut.txt +++ b/src/bun.js/bindings/ZigGlobalObject.lut.txt @@ -75,6 +75,8 @@ ReadableStreamDefaultReader ReadableStreamDefaultReaderConstructorCallback PropertyCallback SubtleCrypto SubtleCryptoConstructorCallback PropertyCallback TextEncoder TextEncoderConstructorCallback PropertyCallback + TextEncoderStream TextEncoderStreamConstructorCallback PropertyCallback + TextDecoderStream TextDecoderStreamConstructorCallback PropertyCallback TransformStream TransformStreamConstructorCallback PropertyCallback TransformStreamDefaultController TransformStreamDefaultControllerConstructorCallback PropertyCallback URL DOMURLConstructorCallback PropertyCallback diff --git a/src/bun.js/bindings/bindings.zig b/src/bun.js/bindings/bindings.zig index 89de038e4bf5aa..a130a7b0463f9b 100644 --- a/src/bun.js/bindings/bindings.zig +++ b/src/bun.js/bindings/bindings.zig @@ -137,7 +137,7 @@ pub const ZigString = extern struct { pub fn dupeForJS(utf8: []const u8, allocator: std.mem.Allocator) !ZigString { if (try strings.toUTF16Alloc(allocator, utf8, false, false)) |utf16| { - var out = ZigString.init16(utf16); + var out = ZigString.initUTF16(utf16); out.mark(); out.markUTF16(); return out; @@ -629,8 +629,8 @@ pub const ZigString = extern struct { return shim.cppFn("toAtomicValue", .{ this, globalThis }); } - pub fn init16(slice_: []const u16) ZigString { - var out = ZigString{ ._unsafe_ptr_do_not_use = std.mem.sliceAsBytes(slice_).ptr, .len = slice_.len }; + pub fn initUTF16(items: []const u16) ZigString { + var out = ZigString{ ._unsafe_ptr_do_not_use = @ptrCast(items), .len = items.len }; out.markUTF16(); return out; } diff --git a/src/bun.js/bindings/bun-simdutf.cpp b/src/bun.js/bindings/bun-simdutf.cpp index 80a74fe4a3babf..ea6ff064165098 100644 --- a/src/bun.js/bindings/bun-simdutf.cpp +++ b/src/bun.js/bindings/bun-simdutf.cpp @@ -264,6 +264,10 @@ size_t simdutf__convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, { return simdutf::convert_valid_utf16be_to_utf32(buf, len, utf32_buffer); } +size_t simdutf__convert_latin1_to_utf8(const char* input, size_t length, char* utf8_buffer) +{ + return simdutf::convert_latin1_to_utf8(input, length, utf8_buffer); +} void simdutf__change_endianness_utf16(const char16_t* buf, size_t length, char16_t* output) { @@ -325,6 +329,11 @@ size_t simdutf__utf32_length_from_utf8(const char* input, size_t length) return simdutf::utf32_length_from_utf8(input, length); } +size_t simdutf__utf8_length_from_latin1(const char* input, size_t length) +{ + return simdutf::utf8_length_from_latin1(input, length); +} + size_t simdutf__base64_encode(const char* input, size_t length, char* output, int is_urlsafe) { return simdutf::binary_to_base64(input, length, output, is_urlsafe ? simdutf::base64_url : simdutf::base64_default); diff --git a/src/bun.js/bindings/bun-simdutf.zig b/src/bun.js/bindings/bun-simdutf.zig index 98f426cd76b7be..7ef948dfc22c1f 100644 --- a/src/bun.js/bindings/bun-simdutf.zig +++ b/src/bun.js/bindings/bun-simdutf.zig @@ -81,6 +81,7 @@ pub extern fn simdutf__convert_utf16le_to_utf32_with_errors(buf: [*]const u16, l pub extern fn simdutf__convert_utf16be_to_utf32_with_errors(buf: [*]const u16, len: usize, utf32_buffer: [*]u32) SIMDUTFResult; pub extern fn simdutf__convert_valid_utf16le_to_utf32(buf: [*]const u16, len: usize, utf32_buffer: [*]u32) usize; pub extern fn simdutf__convert_valid_utf16be_to_utf32(buf: [*]const u16, len: usize, utf32_buffer: [*]u32) usize; +pub extern fn simdutf__convert_latin1_to_utf8(buf: [*]const u8, len: usize, utf8_buffer: [*]u8) usize; pub extern fn simdutf__change_endianness_utf16(buf: [*]const u16, length: usize, output: [*]u16) void; pub extern fn simdutf__count_utf16le(buf: [*]const u16, length: usize) usize; pub extern fn simdutf__count_utf16be(buf: [*]const u16, length: usize) usize; @@ -93,6 +94,7 @@ pub extern fn simdutf__utf16_length_from_utf8(input: [*]const u8, length: usize) pub extern fn simdutf__utf8_length_from_utf32(input: [*c]const c_uint, length: usize) usize; pub extern fn simdutf__utf16_length_from_utf32(input: [*c]const c_uint, length: usize) usize; pub extern fn simdutf__utf32_length_from_utf8(input: [*]const u8, length: usize) usize; +pub extern fn simdutf__utf8_length_from_latin1(input: [*]const u8, length: usize) usize; pub const validate = struct { pub const with_errors = struct { @@ -126,6 +128,14 @@ pub const validate = struct { }; pub const convert = struct { + pub const latin1 = struct { + pub const to = struct { + pub fn utf8(input: []const u8, output: []u8) usize { + return simdutf__convert_latin1_to_utf8(input.ptr, input.len, output.ptr); + } + }; + }; + pub const utf8 = struct { pub const to = struct { pub const utf16 = struct { @@ -261,6 +271,10 @@ pub const length = struct { } }; + pub fn latin1(input: []const u8) usize { + return simdutf__utf8_length_from_latin1(input.ptr, input.len); + } + pub fn utf32(input: []const u32) usize { JSC.markBinding(@src()); return simdutf__utf8_length_from_utf32(input.ptr, input.len); diff --git a/src/bun.js/bindings/generated_classes_list.zig b/src/bun.js/bindings/generated_classes_list.zig index 910b6d0e04d7f0..a18896fbb409b1 100644 --- a/src/bun.js/bindings/generated_classes_list.zig +++ b/src/bun.js/bindings/generated_classes_list.zig @@ -75,4 +75,5 @@ pub const Classes = struct { pub const PostgresSQLQuery = JSC.Postgres.PostgresSQLQuery; pub const BrotliEncoder = JSC.API.BrotliEncoder; pub const BrotliDecoder = JSC.API.BrotliDecoder; + pub const TextEncoderStreamEncoder = JSC.WebCore.TextEncoderStreamEncoder; }; diff --git a/src/bun.js/bindings/napi.cpp b/src/bun.js/bindings/napi.cpp index eb297c93745a68..49344b6607259d 100644 --- a/src/bun.js/bindings/napi.cpp +++ b/src/bun.js/bindings/napi.cpp @@ -62,6 +62,7 @@ #include #include #include "CommonJSModuleRecord.h" +#include "wtf/text/ASCIIFastPath.h" // #include using namespace JSC; @@ -723,12 +724,15 @@ extern "C" napi_status napi_create_arraybuffer(napi_env env, // it doesn't copy the string // but it's only safe to use if we are not setting a property // because we can't guarantee the lifetime of it -#define PROPERTY_NAME_FROM_UTF8(identifierName) \ - size_t utf8Len = strlen(utf8name); \ - JSC::PropertyName identifierName = LIKELY(charactersAreAllASCII(std::span { reinterpret_cast(utf8name), utf8Len })) ? JSC::PropertyName(JSC::Identifier::fromString(vm, WTF::String(WTF::StringImpl::createWithoutCopying({ utf8name, utf8Len })))) : JSC::PropertyName(JSC::Identifier::fromString(vm, WTF::String::fromUTF8(utf8name))); +#define PROPERTY_NAME_FROM_UTF8(identifierName) \ + size_t utf8Len = strlen(utf8Name); \ + WTF::String nameString = LIKELY(WTF::charactersAreAllASCII(std::span { reinterpret_cast(utf8Name), utf8Len })) \ + ? WTF::String(WTF::StringImpl::createWithoutCopying({ utf8Name, utf8Len })) \ + : WTF::String::fromUTF8(utf8Name); \ + JSC::PropertyName identifierName = JSC::Identifier::fromString(vm, nameString); extern "C" napi_status napi_has_named_property(napi_env env, napi_value object, - const char* utf8name, + const char* utf8Name, bool* result) { NAPI_PREMABLE @@ -740,7 +744,7 @@ extern "C" napi_status napi_has_named_property(napi_env env, napi_value object, auto globalObject = toJS(env); auto& vm = globalObject->vm(); - auto* target = toJS(object).getObject(); + JSObject* target = toJS(object).getObject(); if (UNLIKELY(!target)) { return napi_object_expected; } @@ -748,14 +752,15 @@ extern "C" napi_status napi_has_named_property(napi_env env, napi_value object, PROPERTY_NAME_FROM_UTF8(name); auto scope = DECLARE_CATCH_SCOPE(vm); - *result = !!target->getIfPropertyExists(globalObject, name); + PropertySlot slot(target, PropertySlot::InternalMethodType::HasProperty); + *result = target->getPropertySlot(globalObject, name, slot); RETURN_IF_EXCEPTION(scope, napi_generic_failure); scope.clearException(); return napi_ok; } extern "C" napi_status napi_get_named_property(napi_env env, napi_value object, - const char* utf8name, + const char* utf8Name, napi_value* result) { NAPI_PREMABLE @@ -767,7 +772,7 @@ extern "C" napi_status napi_get_named_property(napi_env env, napi_value object, auto globalObject = toJS(env); auto& vm = globalObject->vm(); - auto* target = toJS(object).getObject(); + JSObject* target = toJS(object).getObject(); if (UNLIKELY(!target)) { return napi_object_expected; } diff --git a/src/bun.js/bindings/webcore/DOMClientIsoSubspaces.h b/src/bun.js/bindings/webcore/DOMClientIsoSubspaces.h index f6708c75b7d019..f4846abe1ad394 100644 --- a/src/bun.js/bindings/webcore/DOMClientIsoSubspaces.h +++ b/src/bun.js/bindings/webcore/DOMClientIsoSubspaces.h @@ -476,10 +476,10 @@ class DOMClientIsoSubspaces { // std::unique_ptr m_clientSubspaceForStaticRange; // std::unique_ptr m_clientSubspaceForText; // std::unique_ptr m_clientSubspaceForTextDecoder; - // std::unique_ptr m_clientSubspaceForTextDecoderStream; + std::unique_ptr m_clientSubspaceForTextDecoderStream; // std::unique_ptr m_clientSubspaceForTextDecoderStreamDecoder; std::unique_ptr m_clientSubspaceForTextEncoder; - // std::unique_ptr m_clientSubspaceForTextEncoderStream; + std::unique_ptr m_clientSubspaceForTextEncoderStream; // std::unique_ptr m_clientSubspaceForTextEncoderStreamEncoder; // std::unique_ptr m_clientSubspaceForTextEvent; // std::unique_ptr m_clientSubspaceForTransitionEvent; diff --git a/src/bun.js/bindings/webcore/DOMIsoSubspaces.h b/src/bun.js/bindings/webcore/DOMIsoSubspaces.h index e6450bb15478e0..e4889d43844227 100644 --- a/src/bun.js/bindings/webcore/DOMIsoSubspaces.h +++ b/src/bun.js/bindings/webcore/DOMIsoSubspaces.h @@ -469,10 +469,10 @@ class DOMIsoSubspaces { // std::unique_ptr m_subspaceForStaticRange; // std::unique_ptr m_subspaceForText; // std::unique_ptr m_subspaceForTextDecoder; - // std::unique_ptr m_subspaceForTextDecoderStream; + std::unique_ptr m_subspaceForTextDecoderStream; // std::unique_ptr m_subspaceForTextDecoderStreamDecoder; std::unique_ptr m_subspaceForTextEncoder; - // std::unique_ptr m_subspaceForTextEncoderStream; + std::unique_ptr m_subspaceForTextEncoderStream; // std::unique_ptr m_subspaceForTextEncoderStreamEncoder; // std::unique_ptr m_subspaceForTextEvent; // std::unique_ptr m_subspaceForTransitionEvent; diff --git a/src/bun.js/bindings/webcore/JSTextDecoderStream.cpp b/src/bun.js/bindings/webcore/JSTextDecoderStream.cpp new file mode 100644 index 00000000000000..b2ab335931ca0a --- /dev/null +++ b/src/bun.js/bindings/webcore/JSTextDecoderStream.cpp @@ -0,0 +1,170 @@ +/* + This file is part of the WebKit open source project. + This file has been generated by generate-bindings.pl. DO NOT MODIFY! + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#include "config.h" +#include "JSTextDecoderStream.h" + +#include "ExtendedDOMClientIsoSubspaces.h" +#include "ExtendedDOMIsoSubspaces.h" +#include "JSDOMAttribute.h" +#include "JSDOMBinding.h" +#include "JSDOMBuiltinConstructor.h" +#include "JSDOMExceptionHandling.h" +#include "JSDOMGlobalObjectInlines.h" +#include "JSDOMWrapperCache.h" +// #include "TextDecoderStreamBuiltins.h" +#include "WebCoreJSClientData.h" +#include +#include +#include +#include +#include +#include +#include + +namespace WebCore { +using namespace JSC; + +// Attributes + +static JSC_DECLARE_CUSTOM_GETTER(jsTextDecoderStreamConstructor); + +class JSTextDecoderStreamPrototype final : public JSC::JSNonFinalObject { +public: + using Base = JSC::JSNonFinalObject; + static JSTextDecoderStreamPrototype* create(JSC::VM& vm, JSDOMGlobalObject* globalObject, JSC::Structure* structure) + { + JSTextDecoderStreamPrototype* ptr = new (NotNull, JSC::allocateCell(vm)) JSTextDecoderStreamPrototype(vm, globalObject, structure); + ptr->finishCreation(vm); + return ptr; + } + + DECLARE_INFO; + template + static JSC::GCClient::IsoSubspace* subspaceFor(JSC::VM& vm) + { + STATIC_ASSERT_ISO_SUBSPACE_SHARABLE(JSTextDecoderStreamPrototype, Base); + return &vm.plainObjectSpace(); + } + static JSC::Structure* createStructure(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC::JSValue prototype) + { + return JSC::Structure::create(vm, globalObject, prototype, JSC::TypeInfo(JSC::ObjectType, StructureFlags), info()); + } + +private: + JSTextDecoderStreamPrototype(JSC::VM& vm, JSC::JSGlobalObject*, JSC::Structure* structure) + : JSC::JSNonFinalObject(vm, structure) + { + } + + void finishCreation(JSC::VM&); +}; +STATIC_ASSERT_ISO_SUBSPACE_SHARABLE(JSTextDecoderStreamPrototype, JSTextDecoderStreamPrototype::Base); + +using JSTextDecoderStreamDOMConstructor = JSDOMBuiltinConstructor; + +template<> const ClassInfo JSTextDecoderStreamDOMConstructor::s_info = { "TextDecoderStream"_s, &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(JSTextDecoderStreamDOMConstructor) }; + +template<> JSValue JSTextDecoderStreamDOMConstructor::prototypeForStructure(JSC::VM& vm, const JSDOMGlobalObject& globalObject) +{ + UNUSED_PARAM(vm); + return globalObject.functionPrototype(); +} + +template<> void JSTextDecoderStreamDOMConstructor::initializeProperties(VM& vm, JSDOMGlobalObject& globalObject) +{ + putDirect(vm, vm.propertyNames->length, jsNumber(0), JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::DontEnum); + JSString* nameString = jsNontrivialString(vm, "TextDecoderStream"_s); + m_originalName.set(vm, this, nameString); + putDirect(vm, vm.propertyNames->name, nameString, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::DontEnum); + putDirect(vm, vm.propertyNames->prototype, JSTextDecoderStream::prototype(vm, globalObject), JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::DontEnum | JSC::PropertyAttribute::DontDelete); +} + +template<> FunctionExecutable* JSTextDecoderStreamDOMConstructor::initializeExecutable(VM& vm) +{ + return textDecoderStreamInitializeTextDecoderStreamCodeGenerator(vm); +} + +/* Hash table for prototype */ + +static const HashTableValue JSTextDecoderStreamPrototypeTableValues[] = { + { "constructor"_s, static_cast(PropertyAttribute::DontEnum), NoIntrinsic, { HashTableValue::GetterSetterType, jsTextDecoderStreamConstructor, 0 } }, + { "encoding"_s, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::Accessor | JSC::PropertyAttribute::Builtin, NoIntrinsic, { HashTableValue::BuiltinAccessorType, textDecoderStreamEncodingCodeGenerator, 0 } }, + { "fatal"_s, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::Accessor | JSC::PropertyAttribute::Builtin, NoIntrinsic, { HashTableValue::BuiltinAccessorType, textDecoderStreamFatalCodeGenerator, 0 } }, + { "ignoreBOM"_s, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::Accessor | JSC::PropertyAttribute::Builtin, NoIntrinsic, { HashTableValue::BuiltinAccessorType, textDecoderStreamIgnoreBOMCodeGenerator, 0 } }, + { "readable"_s, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::Accessor | JSC::PropertyAttribute::Builtin, NoIntrinsic, { HashTableValue::BuiltinAccessorType, textDecoderStreamReadableCodeGenerator, 0 } }, + { "writable"_s, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::Accessor | JSC::PropertyAttribute::Builtin, NoIntrinsic, { HashTableValue::BuiltinAccessorType, textDecoderStreamWritableCodeGenerator, 0 } }, +}; + +const ClassInfo JSTextDecoderStreamPrototype::s_info = { "TextDecoderStream"_s, &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(JSTextDecoderStreamPrototype) }; + +void JSTextDecoderStreamPrototype::finishCreation(VM& vm) +{ + Base::finishCreation(vm); + reifyStaticProperties(vm, JSTextDecoderStream::info(), JSTextDecoderStreamPrototypeTableValues, *this); + JSC_TO_STRING_TAG_WITHOUT_TRANSITION(); +} + +const ClassInfo JSTextDecoderStream::s_info = { "TextDecoderStream"_s, &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(JSTextDecoderStream) }; + +JSTextDecoderStream::JSTextDecoderStream(Structure* structure, JSDOMGlobalObject& globalObject) + : JSDOMObject(structure, globalObject) +{ +} + +JSObject* JSTextDecoderStream::createPrototype(VM& vm, JSDOMGlobalObject& globalObject) +{ + auto* structure = JSTextDecoderStreamPrototype::createStructure(vm, &globalObject, globalObject.objectPrototype()); + structure->setMayBePrototype(true); + return JSTextDecoderStreamPrototype::create(vm, &globalObject, structure); +} + +JSObject* JSTextDecoderStream::prototype(VM& vm, JSDOMGlobalObject& globalObject) +{ + return getDOMPrototype(vm, globalObject); +} + +JSValue JSTextDecoderStream::getConstructor(VM& vm, const JSGlobalObject* globalObject) +{ + return getDOMConstructor(vm, *jsCast(globalObject)); +} + +void JSTextDecoderStream::destroy(JSC::JSCell* cell) +{ + JSTextDecoderStream* thisObject = static_cast(cell); + thisObject->JSTextDecoderStream::~JSTextDecoderStream(); +} + +JSC_DEFINE_CUSTOM_GETTER(jsTextDecoderStreamConstructor, (JSGlobalObject * lexicalGlobalObject, EncodedJSValue thisValue, PropertyName)) +{ + auto& vm = JSC::getVM(lexicalGlobalObject); + auto throwScope = DECLARE_THROW_SCOPE(vm); + auto* prototype = jsDynamicCast(JSValue::decode(thisValue)); + if (UNLIKELY(!prototype)) + return throwVMTypeError(lexicalGlobalObject, throwScope); + return JSValue::encode(JSTextDecoderStream::getConstructor(vm, prototype->globalObject())); +} + +JSC::GCClient::IsoSubspace* JSTextDecoderStream::subspaceForImpl(JSC::VM& vm) +{ + return WebCore::subspaceForImpl(vm, [](auto& spaces) { return spaces.m_clientSubspaceForTextDecoderStream.get(); }, [](auto& spaces, auto&& space) { spaces.m_clientSubspaceForTextDecoderStream = std::forward(space); }, [](auto& spaces) { return spaces.m_subspaceForTextDecoderStream.get(); }, [](auto& spaces, auto&& space) { spaces.m_subspaceForTextDecoderStream = std::forward(space); }); +} + +} diff --git a/src/bun.js/bindings/webcore/JSTextDecoderStream.h b/src/bun.js/bindings/webcore/JSTextDecoderStream.h new file mode 100644 index 00000000000000..eecb62bdc8a276 --- /dev/null +++ b/src/bun.js/bindings/webcore/JSTextDecoderStream.h @@ -0,0 +1,64 @@ +/* + This file is part of the WebKit open source project. + This file has been generated by generate-bindings.pl. DO NOT MODIFY! + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#pragma once + +#include "JSDOMWrapper.h" + +namespace WebCore { + +class JSTextDecoderStream : public JSDOMObject { +public: + using Base = JSDOMObject; + static JSTextDecoderStream* create(JSC::Structure* structure, JSDOMGlobalObject* globalObject) + { + auto& vm = globalObject->vm(); + JSTextDecoderStream* ptr = new (NotNull, JSC::allocateCell(vm)) JSTextDecoderStream(structure, *globalObject); + ptr->finishCreation(vm); + return ptr; + } + + static JSC::JSObject* createPrototype(JSC::VM&, JSDOMGlobalObject&); + static JSC::JSObject* prototype(JSC::VM&, JSDOMGlobalObject&); + static void destroy(JSC::JSCell*); + + DECLARE_INFO; + + static JSC::Structure* createStructure(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC::JSValue prototype) + { + return JSC::Structure::create(vm, globalObject, prototype, JSC::TypeInfo(JSC::ObjectType, StructureFlags), info(), JSC::NonArray); + } + + static JSC::JSValue getConstructor(JSC::VM&, const JSC::JSGlobalObject*); + template static JSC::GCClient::IsoSubspace* subspaceFor(JSC::VM& vm) + { + if constexpr (mode == JSC::SubspaceAccess::Concurrently) + return nullptr; + return subspaceForImpl(vm); + } + static JSC::GCClient::IsoSubspace* subspaceForImpl(JSC::VM& vm); + +protected: + JSTextDecoderStream(JSC::Structure*, JSDOMGlobalObject&); + + DECLARE_DEFAULT_FINISH_CREATION; +}; + +} // namespace WebCore diff --git a/src/bun.js/bindings/webcore/JSTextEncoderStream.cpp b/src/bun.js/bindings/webcore/JSTextEncoderStream.cpp new file mode 100644 index 00000000000000..319f7d5c418b71 --- /dev/null +++ b/src/bun.js/bindings/webcore/JSTextEncoderStream.cpp @@ -0,0 +1,168 @@ +/* + This file is part of the WebKit open source project. + This file has been generated by generate-bindings.pl. DO NOT MODIFY! + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#include "config.h" +#include "JSTextEncoderStream.h" + +#include "ExtendedDOMClientIsoSubspaces.h" +#include "ExtendedDOMIsoSubspaces.h" +#include "JSDOMAttribute.h" +#include "JSDOMBinding.h" +#include "JSDOMBuiltinConstructor.h" +#include "JSDOMExceptionHandling.h" +#include "JSDOMGlobalObjectInlines.h" +#include "JSDOMWrapperCache.h" +// #include "TextEncoderStreamBuiltins.h" +#include "WebCoreJSClientData.h" +#include +#include +#include +#include +#include +#include +#include + +namespace WebCore { +using namespace JSC; + +// Attributes + +static JSC_DECLARE_CUSTOM_GETTER(jsTextEncoderStreamConstructor); + +class JSTextEncoderStreamPrototype final : public JSC::JSNonFinalObject { +public: + using Base = JSC::JSNonFinalObject; + static JSTextEncoderStreamPrototype* create(JSC::VM& vm, JSDOMGlobalObject* globalObject, JSC::Structure* structure) + { + JSTextEncoderStreamPrototype* ptr = new (NotNull, JSC::allocateCell(vm)) JSTextEncoderStreamPrototype(vm, globalObject, structure); + ptr->finishCreation(vm); + return ptr; + } + + DECLARE_INFO; + template + static JSC::GCClient::IsoSubspace* subspaceFor(JSC::VM& vm) + { + STATIC_ASSERT_ISO_SUBSPACE_SHARABLE(JSTextEncoderStreamPrototype, Base); + return &vm.plainObjectSpace(); + } + static JSC::Structure* createStructure(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC::JSValue prototype) + { + return JSC::Structure::create(vm, globalObject, prototype, JSC::TypeInfo(JSC::ObjectType, StructureFlags), info()); + } + +private: + JSTextEncoderStreamPrototype(JSC::VM& vm, JSC::JSGlobalObject*, JSC::Structure* structure) + : JSC::JSNonFinalObject(vm, structure) + { + } + + void finishCreation(JSC::VM&); +}; +STATIC_ASSERT_ISO_SUBSPACE_SHARABLE(JSTextEncoderStreamPrototype, JSTextEncoderStreamPrototype::Base); + +using JSTextEncoderStreamDOMConstructor = JSDOMBuiltinConstructor; + +template<> const ClassInfo JSTextEncoderStreamDOMConstructor::s_info = { "TextEncoderStream"_s, &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(JSTextEncoderStreamDOMConstructor) }; + +template<> JSValue JSTextEncoderStreamDOMConstructor::prototypeForStructure(JSC::VM& vm, const JSDOMGlobalObject& globalObject) +{ + UNUSED_PARAM(vm); + return globalObject.functionPrototype(); +} + +template<> void JSTextEncoderStreamDOMConstructor::initializeProperties(VM& vm, JSDOMGlobalObject& globalObject) +{ + putDirect(vm, vm.propertyNames->length, jsNumber(0), JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::DontEnum); + JSString* nameString = jsNontrivialString(vm, "TextEncoderStream"_s); + m_originalName.set(vm, this, nameString); + putDirect(vm, vm.propertyNames->name, nameString, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::DontEnum); + putDirect(vm, vm.propertyNames->prototype, JSTextEncoderStream::prototype(vm, globalObject), JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::DontEnum | JSC::PropertyAttribute::DontDelete); +} + +template<> FunctionExecutable* JSTextEncoderStreamDOMConstructor::initializeExecutable(VM& vm) +{ + return textEncoderStreamInitializeTextEncoderStreamCodeGenerator(vm); +} + +/* Hash table for prototype */ + +static const HashTableValue JSTextEncoderStreamPrototypeTableValues[] = { + { "constructor"_s, static_cast(PropertyAttribute::DontEnum), NoIntrinsic, { HashTableValue::GetterSetterType, jsTextEncoderStreamConstructor, 0 } }, + { "encoding"_s, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::Accessor | JSC::PropertyAttribute::Builtin, NoIntrinsic, { HashTableValue::BuiltinAccessorType, textEncoderStreamEncodingCodeGenerator, 0 } }, + { "readable"_s, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::Accessor | JSC::PropertyAttribute::Builtin, NoIntrinsic, { HashTableValue::BuiltinAccessorType, textEncoderStreamReadableCodeGenerator, 0 } }, + { "writable"_s, JSC::PropertyAttribute::ReadOnly | JSC::PropertyAttribute::Accessor | JSC::PropertyAttribute::Builtin, NoIntrinsic, { HashTableValue::BuiltinAccessorType, textEncoderStreamWritableCodeGenerator, 0 } }, +}; + +const ClassInfo JSTextEncoderStreamPrototype::s_info = { "TextEncoderStream"_s, &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(JSTextEncoderStreamPrototype) }; + +void JSTextEncoderStreamPrototype::finishCreation(VM& vm) +{ + Base::finishCreation(vm); + reifyStaticProperties(vm, JSTextEncoderStream::info(), JSTextEncoderStreamPrototypeTableValues, *this); + JSC_TO_STRING_TAG_WITHOUT_TRANSITION(); +} + +const ClassInfo JSTextEncoderStream::s_info = { "TextEncoderStream"_s, &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(JSTextEncoderStream) }; + +JSTextEncoderStream::JSTextEncoderStream(Structure* structure, JSDOMGlobalObject& globalObject) + : JSDOMObject(structure, globalObject) +{ +} + +JSObject* JSTextEncoderStream::createPrototype(VM& vm, JSDOMGlobalObject& globalObject) +{ + auto* structure = JSTextEncoderStreamPrototype::createStructure(vm, &globalObject, globalObject.objectPrototype()); + structure->setMayBePrototype(true); + return JSTextEncoderStreamPrototype::create(vm, &globalObject, structure); +} + +JSObject* JSTextEncoderStream::prototype(VM& vm, JSDOMGlobalObject& globalObject) +{ + return getDOMPrototype(vm, globalObject); +} + +JSValue JSTextEncoderStream::getConstructor(VM& vm, const JSGlobalObject* globalObject) +{ + return getDOMConstructor(vm, *jsCast(globalObject)); +} + +void JSTextEncoderStream::destroy(JSC::JSCell* cell) +{ + JSTextEncoderStream* thisObject = static_cast(cell); + thisObject->JSTextEncoderStream::~JSTextEncoderStream(); +} + +JSC_DEFINE_CUSTOM_GETTER(jsTextEncoderStreamConstructor, (JSGlobalObject * lexicalGlobalObject, EncodedJSValue thisValue, PropertyName)) +{ + auto& vm = JSC::getVM(lexicalGlobalObject); + auto throwScope = DECLARE_THROW_SCOPE(vm); + auto* prototype = jsDynamicCast(JSValue::decode(thisValue)); + if (UNLIKELY(!prototype)) + return throwVMTypeError(lexicalGlobalObject, throwScope); + return JSValue::encode(JSTextEncoderStream::getConstructor(vm, prototype->globalObject())); +} + +JSC::GCClient::IsoSubspace* JSTextEncoderStream::subspaceForImpl(JSC::VM& vm) +{ + return WebCore::subspaceForImpl(vm, [](auto& spaces) { return spaces.m_clientSubspaceForTextEncoderStream.get(); }, [](auto& spaces, auto&& space) { spaces.m_clientSubspaceForTextEncoderStream = std::forward(space); }, [](auto& spaces) { return spaces.m_subspaceForTextEncoderStream.get(); }, [](auto& spaces, auto&& space) { spaces.m_subspaceForTextEncoderStream = std::forward(space); }); +} + +} diff --git a/src/bun.js/bindings/webcore/JSTextEncoderStream.h b/src/bun.js/bindings/webcore/JSTextEncoderStream.h new file mode 100644 index 00000000000000..4db3bbbbd022b9 --- /dev/null +++ b/src/bun.js/bindings/webcore/JSTextEncoderStream.h @@ -0,0 +1,64 @@ +/* + This file is part of the WebKit open source project. + This file has been generated by generate-bindings.pl. DO NOT MODIFY! + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public License + along with this library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. +*/ + +#pragma once + +#include "JSDOMWrapper.h" + +namespace WebCore { + +class JSTextEncoderStream : public JSDOMObject { +public: + using Base = JSDOMObject; + static JSTextEncoderStream* create(JSC::Structure* structure, JSDOMGlobalObject* globalObject) + { + auto& vm = globalObject->vm(); + JSTextEncoderStream* ptr = new (NotNull, JSC::allocateCell(vm)) JSTextEncoderStream(structure, *globalObject); + ptr->finishCreation(vm); + return ptr; + } + + static JSC::JSObject* createPrototype(JSC::VM&, JSDOMGlobalObject&); + static JSC::JSObject* prototype(JSC::VM&, JSDOMGlobalObject&); + static void destroy(JSC::JSCell*); + + DECLARE_INFO; + + static JSC::Structure* createStructure(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC::JSValue prototype) + { + return JSC::Structure::create(vm, globalObject, prototype, JSC::TypeInfo(JSC::ObjectType, StructureFlags), info(), JSC::NonArray); + } + + static JSC::JSValue getConstructor(JSC::VM&, const JSC::JSGlobalObject*); + template static JSC::GCClient::IsoSubspace* subspaceFor(JSC::VM& vm) + { + if constexpr (mode == JSC::SubspaceAccess::Concurrently) + return nullptr; + return subspaceForImpl(vm); + } + static JSC::GCClient::IsoSubspace* subspaceForImpl(JSC::VM& vm); + +protected: + JSTextEncoderStream(JSC::Structure*, JSDOMGlobalObject&); + + DECLARE_DEFAULT_FINISH_CREATION; +}; + +} // namespace WebCore diff --git a/src/bun.js/webcore/blob.zig b/src/bun.js/webcore/blob.zig index 0f4ef6c3a240cc..4ae9cede089ccf 100644 --- a/src/bun.js/webcore/blob.zig +++ b/src/bun.js/webcore/blob.zig @@ -4249,7 +4249,7 @@ pub const Blob = struct { // if toUTF16Alloc returns null, it means there are no non-ASCII characters if (strings.toUTF16Alloc(allocator, buf, false, false) catch null) |external| { if (comptime lifetime != .temporary) this.setIsASCIIFlag(false); - const result = ZigString.init16(external).toJSONObject(global); + const result = ZigString.initUTF16(external).toJSONObject(global); allocator.free(external); return result; } diff --git a/src/bun.js/webcore/encoding.classes.ts b/src/bun.js/webcore/encoding.classes.ts index 7fd70406f4dd86..7b5114ebd815ce 100644 --- a/src/bun.js/webcore/encoding.classes.ts +++ b/src/bun.js/webcore/encoding.classes.ts @@ -31,4 +31,27 @@ export default [ }, }, }), + define({ + name: "TextEncoderStreamEncoder", + construct: true, + finalize: true, + JSType: "0b11101110", + configurable: false, + klass: {}, + proto: { + encode: { + fn: "encode", + length: 1, + + DOMJIT: { + returns: "JSUint8Array", + args: ["JSString"], + }, + }, + flush: { + fn: "flush", + length: 0, + }, + }, + }), ]; diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index 588b22439218c6..77e281686525a8 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -15,7 +15,6 @@ const Output = bun.Output; const MutableString = bun.MutableString; const strings = bun.strings; const string = bun.string; -const default_allocator = bun.default_allocator; const FeatureFlags = bun.FeatureFlags; const ArrayBuffer = @import("../base.zig").ArrayBuffer; const Properties = @import("../base.zig").Properties; @@ -38,8 +37,6 @@ const Task = @import("../javascript.zig").Task; const picohttp = bun.picohttp; pub const TextEncoder = struct { - filler: u32 = 0, - pub export fn TextEncoder__encode8( globalThis: *JSGlobalObject, ptr: [*]const u8, @@ -110,7 +107,7 @@ pub const TextEncoder = struct { return uint8array; } else { const bytes = strings.toUTF8AllocWithType( - default_allocator, + bun.default_allocator, @TypeOf(slice), slice, ) catch { @@ -180,7 +177,7 @@ pub const TextEncoder = struct { var stack_buf: [2048]u8 = undefined; var buf_to_use: []u8 = &stack_buf; const length = rope_str.length(); - var array: JSValue = JSValue.zero; + var array: JSValue = .zero; if (length > stack_buf.len / 2) { array = JSC.JSValue.createUninitializedUint8Array(globalThis, length); array.ensureStillAlive(); @@ -224,7 +221,7 @@ pub const TextEncoder = struct { result.written = 3; } const sized: [2]u32 = .{ result.read, result.written }; - return @as(u64, @bitCast(sized)); + return @bitCast(sized); } pub export fn TextEncoder__encodeInto8( @@ -238,7 +235,7 @@ pub const TextEncoder = struct { const result: strings.EncodeIntoResult = strings.copyLatin1IntoUTF8(output, []const u8, input); const sized: [2]u32 = .{ result.read, result.written }; - return @as(u64, @bitCast(sized)); + return @bitCast(sized); } }; @@ -370,6 +367,9 @@ pub const EncodingLabel = enum { Eight.case("utf-16le"), => EncodingLabel.@"UTF-16LE", + Eight.case("utf-16be"), + => EncodingLabel.@"UTF-16BE", + Eight.case("utf8"), Eight.case("utf-8") => EncodingLabel.@"UTF-8", else => null, }, @@ -405,14 +405,270 @@ pub const EncodingLabel = enum { } }; +pub const TextEncoderStreamEncoder = struct { + pending_lead_surrogate: ?u16 = null, + + const log = Output.scoped(.TextEncoderStreamEncoder, false); + + pub usingnamespace JSC.Codegen.JSTextEncoderStreamEncoder; + pub usingnamespace bun.New(TextEncoderStreamEncoder); + + pub fn finalize(this: *TextEncoderStreamEncoder) void { + this.destroy(); + } + + pub fn constructor(_: *JSGlobalObject, _: *JSC.CallFrame) ?*TextEncoderStreamEncoder { + return TextEncoderStreamEncoder.new(.{}); + } + + pub fn encode(this: *TextEncoderStreamEncoder, globalObject: *JSC.JSGlobalObject, callFrame: *JSC.CallFrame) JSValue { + const arguments = callFrame.arguments(1).slice(); + if (arguments.len == 0) { + globalObject.throwNotEnoughArguments("TextEncoderStreamEncoder.encode", 1, arguments.len); + return .zero; + } + + const str: ZigString = (arguments[0].toStringOrNull(globalObject) orelse return .zero).getZigString(globalObject); + + if (str.is16Bit()) { + return this.encodeUTF16(globalObject, str.utf16SliceAligned()); + } + + return this.encodeLatin1(globalObject, str.slice()); + } + + pub fn encodeWithoutTypeChecks(this: *TextEncoderStreamEncoder, globalObject: *JSC.JSGlobalObject, input: *JSC.JSString) JSValue { + const str = input.getZigString(globalObject); + + if (str.is16Bit()) { + return this.encodeUTF16(globalObject, str.utf16SliceAligned()); + } + + return this.encodeLatin1(globalObject, str.slice()); + } + + fn encodeLatin1(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject, input: []const u8) JSValue { + log("encodeLatin1: \"{s}\"", .{input}); + + if (input.len == 0) return .undefined; + + const prepend_replacement = prepend_replacement: { + if (this.pending_lead_surrogate != null) { + this.pending_lead_surrogate = null; + // no latin1 surrogate pairs + break :prepend_replacement true; + } + + break :prepend_replacement false; + }; + + const length: usize = bun.simdutf.length.utf8.from.latin1(input) + @as(usize, if (prepend_replacement) 3 else 0); + + const array_value, const bytes = ArrayBuffer.allocBuffer(globalObject, length); + + var remain = bytes; + + if (prepend_replacement) { + @memcpy(remain[0..3], &[3]u8{ 0xef, 0xbf, 0xbd }); + remain = remain[3..]; + } + + const count = bun.simdutf.convert.latin1.to.utf8(input, remain); + + bun.debugAssert(count == remain.len); + + return array_value; + } + + fn encodeUTF16(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject, input: []const u16) JSValue { + log("encodeUTF16: \"{}\"", .{bun.fmt.utf16(input)}); + + if (input.len == 0) return .undefined; + + const Prepend = struct { + bytes: [4]u8, + len: u3, + + pub const replacement: @This() = .{ .bytes = .{ 0xef, 0xbf, 0xbd, 0 }, .len = 3 }; + + pub fn fromSequence(seq: [4]u8, length: u3) @This() { + return .{ .bytes = seq, .len = length }; + } + }; + + var remain = input; + + const prepend: ?Prepend = prepend: { + if (this.pending_lead_surrogate) |lead| { + this.pending_lead_surrogate = null; + const maybe_trail = remain[0]; + if (strings.u16IsTrail(maybe_trail)) { + const converted = strings.utf16CodepointWithFFFD([]const u16, &.{ lead, maybe_trail }); + // shouldn't fail because `u16IsTrail` is true and `pending_lead_surrogate` is always + // a valid lead. + bun.debugAssert(!converted.fail); + + const sequence = strings.wtf8Sequence(converted.code_point); + + remain = remain[1..]; + if (remain.len == 0) { + return ArrayBuffer.createBuffer( + globalObject, + sequence[0..converted.utf8Width()], + ); + } + + break :prepend Prepend.fromSequence(sequence, converted.utf8Width()); + } + + break :prepend Prepend.replacement; + } + break :prepend null; + }; + + // TODO: use ExternalArrayBuffer and skip validation pass + const validate_result = bun.simdutf.validate.with_errors.utf16le(remain); + if (validate_result.status == .success) { + const len = bun.simdutf.length.utf8.from.utf16.le(remain); + if (len == 0) return .undefined; + + const array_value, var bytes = ArrayBuffer.allocBuffer(globalObject, len + if (prepend) |pre| pre.len else 0); + if (array_value.isEmpty()) { + return .zero; + } + + if (prepend) |pre| { + @memcpy(bytes[0..pre.len], pre.bytes[0..pre.len]); + bytes = bytes[pre.len..]; + } + + const convert_result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(remain, bytes); + bun.debugAssert(convert_result.status == .success); + + return array_value; + } + + var buf = std.ArrayList(u8).initCapacity( + bun.default_allocator, + validate_result.count + if (prepend) |pre| pre.len else 0, + ) catch bun.outOfMemory(); + defer buf.deinit(); + + if (prepend) |pre| { + buf.appendSliceAssumeCapacity(pre.bytes[0..pre.len]); + } + + var lead_surrogate: ?u16 = null; + + while (strings.firstNonASCII16([]const u16, remain)) |non_ascii| { + const token = remain[non_ascii]; + const ascii_slice = remain[0..non_ascii]; + remain = remain[non_ascii + 1 ..]; + + if (lead_surrogate) |lead| { + lead_surrogate = null; + + if (ascii_slice.len != 0) { + // - +3 for replacement character + // - it's ascii, length will be the same, just need to convert u16 -> u8 + buf.ensureUnusedCapacity(ascii_slice.len + 3) catch bun.outOfMemory(); + buf.appendSlice(&.{ 0xef, 0xbf, 0xbd }) catch bun.outOfMemory(); + strings.convertUTF16ToUTF8Append(&buf, ascii_slice) catch bun.outOfMemory(); + + continue; + } + + if (strings.u16IsTrail(token)) { + const converted = strings.utf16CodepointWithFFFD([]const u16, &.{ lead, token }); + bun.debugAssert(!converted.fail); + + const sequence = strings.wtf8Sequence(converted.code_point); + + buf.appendSlice(sequence[0..converted.utf8Width()]) catch bun.outOfMemory(); + continue; + } + + buf.appendSlice(&.{ 0xef, 0xbf, 0xbd }) catch bun.outOfMemory(); + } + + if (strings.u16IsLead(token)) { + if (remain.len == 0) { + this.pending_lead_surrogate = token; + if (buf.items.len == 0) return .undefined; + return ArrayBuffer.createBuffer(globalObject, buf.items); + } + + lead_surrogate = token; + continue; + } + + bun.debugAssert(strings.u16IsTrail(token)); + + buf.appendSlice(&.{ 0xef, 0xbf, 0xbd }) catch bun.outOfMemory(); + } + + if (lead_surrogate != null and remain.len == 0) { + this.pending_lead_surrogate = lead_surrogate; + if (buf.items.len == 0) return .undefined; + return ArrayBuffer.createBuffer(globalObject, buf.items); + } + + const array_value, var bytes = ArrayBuffer.allocBuffer(globalObject, buf.items.len + remain.len + @as(usize, if (lead_surrogate != null) 3 else 0)); + if (array_value.isEmpty()) return .zero; + + @memcpy(bytes[0..buf.items.len], buf.items); + bytes = bytes[buf.items.len..]; + + if (lead_surrogate != null) { + @memcpy(bytes[0..3], &[3]u8{ 0xef, 0xbf, 0xbd }); + bytes = bytes[3..]; + } + + _ = strings.convertUTF16toUTF8InBuffer(bytes, remain) catch unreachable; + + return array_value; + } + + pub fn flush(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject, _: *JSC.CallFrame) JSValue { + return flushBody(this, globalObject); + } + + pub fn flushWithoutTypeChecks(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject) JSValue { + return flushBody(this, globalObject); + } + + fn flushBody(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject) JSValue { + return if (this.pending_lead_surrogate == null) + .undefined + else + JSC.ArrayBuffer.createBuffer(globalObject, &.{ 0xef, 0xbf, 0xbd }); + } +}; + pub const TextDecoder = struct { - scratch_memory: []u8 = &[_]u8{}, + + // used for utf8 decoding + buffered: struct { + buf: [3]u8 = .{0} ** 3, + len: u2 = 0, + + pub fn slice(this: *@This()) []const u8 { + return this.buf[0..this.len]; + } + } = .{}, + + // used for utf16 decoding + lead_byte: ?u8 = null, + lead_surrogate: ?u16 = null, + ignore_bom: bool = false, fatal: bool = false, encoding: EncodingLabel = EncodingLabel.@"UTF-8", + pub usingnamespace bun.New(TextDecoder); + pub fn finalize(this: *TextDecoder) void { - bun.default_allocator.destroy(this); + this.destroy(); } pub usingnamespace JSC.Codegen.JSTextDecoder; @@ -458,163 +714,149 @@ pub const TextDecoder = struct { const Vector16 = std.meta.Vector(16, u16); const max_16_ascii: Vector16 = @splat(@as(u16, 127)); - fn decodeUTF16WithAlignment( - _: *TextDecoder, - comptime Slice: type, - slice: Slice, - ctx: js.JSContextRef, - ) JSC.JSValue { - var i: usize = 0; - - while (i < slice.len) { - while (i + strings.ascii_u16_vector_size <= slice.len) { - const vec: strings.AsciiU16Vector = slice[i..][0..strings.ascii_u16_vector_size].*; - if ((@reduce( - .Or, - @as( - strings.AsciiVectorU16U1, - @bitCast(vec > strings.max_u16_ascii), - ) | @as( - strings.AsciiVectorU16U1, - @bitCast(vec < strings.min_u16_ascii), - ), - ) == 0)) { - break; - } - i += strings.ascii_u16_vector_size; - } - while (i < slice.len and slice[i] <= 127) { - i += 1; + fn processCodeUnitUTF16( + this: *TextDecoder, + output: *std.ArrayListUnmanaged(u16), + saw_error: *bool, + code_unit: u16, + ) error{OutOfMemory}!void { + if (this.lead_surrogate) |lead_surrogate| { + this.lead_surrogate = null; + + if (strings.u16IsTrail(code_unit)) { + // TODO: why is this here? + // const code_point = strings.u16GetSupplementary(lead_surrogate, code_unit); + try output.appendSlice( + bun.default_allocator, + &.{ lead_surrogate, code_unit }, + ); + return; } - break; + try output.append(bun.default_allocator, strings.unicode_replacement); + saw_error.* = true; } - // is this actually a UTF-16 string that is just ascii? - // we can still allocate as UTF-16 and just copy the bytes - if (i == slice.len) { - if (comptime Slice == []u16) { - return ZigString.init16(slice).toJS(ctx); - } else { - var str = ZigString.init(""); - str._unsafe_ptr_do_not_use = @as([*]const u8, @ptrCast(slice.ptr)); - str.len = slice.len; - str.markUTF16(); - return str.toJS(ctx.ptr()); - } + if (strings.u16IsLead(code_unit)) { + this.lead_surrogate = code_unit; + return; } - var buffer = std.ArrayListAlignedUnmanaged(u16, @alignOf(@TypeOf(slice.ptr))){}; - // copy the allocator to reduce the number of threadlocal accesses - const allocator = VirtualMachine.get().allocator; - buffer.ensureTotalCapacity(allocator, slice.len) catch unreachable; - buffer.items.len = i; - - var len = std.mem.sliceAsBytes(slice[0..i]).len; - @memcpy( - std.mem.sliceAsBytes(buffer.items)[0..len], - std.mem.sliceAsBytes(slice)[0..len], - ); - - const first_high_surrogate = 0xD800; - const last_high_surrogate = 0xDBFF; - const first_low_surrogate = 0xDC00; - const last_low_surrogate = 0xDFFF; - - var remainder = slice[i..]; - while (remainder.len > 0) { - switch (remainder[0]) { - 0...127 => { - const count: usize = if (strings.firstNonASCII16(Slice, remainder)) |index| index + 1 else remainder.len; - - buffer.ensureUnusedCapacity(allocator, count) catch unreachable; - - const prev = buffer.items.len; - buffer.items.len += count; - // Since this string is freshly allocated, we know it's not going to overlap - len = std.mem.sliceAsBytes(remainder[0..count]).len; - @memcpy( - std.mem.sliceAsBytes(buffer.items[prev..])[0..len], - std.mem.sliceAsBytes(remainder)[0..len], - ); - remainder = remainder[count..]; - }, - first_high_surrogate...last_high_surrogate => |first| { - if (remainder.len > 1) { - if (remainder[1] >= first_low_surrogate and remainder[1] <= last_low_surrogate) { - buffer.ensureUnusedCapacity(allocator, 2) catch unreachable; - buffer.items.ptr[buffer.items.len] = first; - buffer.items.ptr[buffer.items.len + 1] = remainder[1]; - buffer.items.len += 2; - remainder = remainder[2..]; - continue; - } - } - buffer.ensureUnusedCapacity(allocator, 1) catch unreachable; - buffer.items.ptr[buffer.items.len] = strings.unicode_replacement; - buffer.items.len += 1; - remainder = remainder[1..]; - continue; - }, - // BOM handling - 0xFEFF => { - buffer.ensureTotalCapacity(allocator, 1) catch unreachable; - buffer.items.ptr[buffer.items.len] = remainder[0]; - buffer.items.len += 1; - remainder = remainder[1..]; - }, - - // Is this an unpaired low surrogate or four-digit hex escape? - else => { - buffer.ensureUnusedCapacity(allocator, 1) catch unreachable; - buffer.items.ptr[buffer.items.len] = strings.unicode_replacement; - buffer.items.len += 1; - remainder = remainder[1..]; - }, + if (strings.u16IsTrail(code_unit)) { + try output.append(bun.default_allocator, strings.unicode_replacement); + saw_error.* = true; + return; + } + + try output.append(bun.default_allocator, code_unit); + return; + } + + pub fn codeUnitFromBytesUTF16( + first: u16, + second: u16, + comptime big_endian: bool, + ) u16 { + return if (comptime big_endian) + (first << 8) | second + else + first | (second << 8); + } + + pub fn decodeUTF16( + this: *TextDecoder, + bytes: []const u8, + comptime big_endian: bool, + comptime flush: bool, + ) error{OutOfMemory}!struct { std.ArrayListUnmanaged(u16), bool } { + var output: std.ArrayListUnmanaged(u16) = .{}; + try output.ensureTotalCapacity(bun.default_allocator, @divFloor(bytes.len, 2)); + + var remain = bytes; + var saw_error = false; + + if (this.lead_byte) |lead_byte| { + if (remain.len > 0) { + this.lead_byte = null; + + try this.processCodeUnitUTF16( + &output, + &saw_error, + codeUnitFromBytesUTF16(@intCast(lead_byte), @intCast(remain[0]), big_endian), + ); + remain = remain[1..]; } } - const full = buffer.toOwnedSlice(allocator) catch @panic("TODO"); + var i: usize = 0; + + while (i < remain.len -| 1) { + try this.processCodeUnitUTF16( + &output, + &saw_error, + codeUnitFromBytesUTF16(@intCast(remain[i]), @intCast(remain[i + 1]), big_endian), + ); + i += 2; + } + + if (remain.len != 0 and i == remain.len - 1) { + this.lead_byte = remain[i]; + } else { + bun.assertWithLocation(i == remain.len, @src()); + } + + if (comptime flush) { + if (this.lead_byte != null or this.lead_surrogate != null) { + this.lead_byte = null; + this.lead_surrogate = null; + try output.append(bun.default_allocator, strings.unicode_replacement); + saw_error = true; + return .{ output, saw_error }; + } + } - var out = ZigString.init(""); - out._unsafe_ptr_do_not_use = @as([*]u8, @ptrCast(full.ptr)); - out.len = full.len; - out.markUTF16(); - return out.toJS(ctx.ptr()); + return .{ output, saw_error }; } pub fn decode(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, callframe: *JSC.CallFrame) JSValue { - const arguments_ = callframe.arguments(2); - const arguments = arguments_.ptr[0..arguments_.len]; + const arguments = callframe.arguments(2).slice(); - if (arguments.len < 1 or arguments[0].isUndefined()) { - return ZigString.Empty.toJS(globalThis); - } + const input_slice = input_slice: { + if (arguments.len == 0 or arguments[0].isUndefined()) { + break :input_slice ""; + } + + if (arguments[0].asArrayBuffer(globalThis)) |array_buffer| { + break :input_slice array_buffer.slice(); + } - const array_buffer = arguments[0].asArrayBuffer(globalThis) orelse { globalThis.throwInvalidArguments("TextDecoder.decode expects an ArrayBuffer or TypedArray", .{}); - return JSValue.zero; + return .zero; }; - if (arguments.len > 1 and arguments[1].isObject()) { - if (arguments[1].fastGet(globalThis, .stream)) |stream| { - if (stream.coerce(bool, globalThis)) { - return this.decodeSlice(globalThis, array_buffer.slice(), true); - } - - if (globalThis.hasException()) { - return JSValue.zero; + const stream = stream: { + if (arguments.len > 1 and arguments[1].isObject()) { + if (arguments[1].fastGet(globalThis, .stream)) |stream_value| { + const stream_bool = stream_value.coerce(bool, globalThis); + if (globalThis.hasException()) { + return .zero; + } + break :stream stream_bool; } } - } - return this.decodeSlice(globalThis, array_buffer.slice(), false); + break :stream false; + }; + + return switch (!stream) { + inline else => |flush| this.decodeSlice(globalThis, input_slice, flush), + }; } pub fn decodeWithoutTypeChecks(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, uint8array: *JSC.JSUint8Array) JSValue { return this.decodeSlice(globalThis, uint8array.slice(), false); } - fn decodeSlice(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, buffer_slice: []const u8, comptime stream: bool) JSValue { + fn decodeSlice(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, buffer_slice: []const u8, comptime flush: bool) JSValue { switch (this.encoding) { EncodingLabel.latin1 => { if (strings.isAllASCII(buffer_slice)) { @@ -635,63 +877,85 @@ pub const TextDecoder = struct { return ZigString.toExternalU16(bytes.ptr, out.written, globalThis); }, EncodingLabel.@"UTF-8" => { - const toUTF16 = if (stream) strings.toUTF16Alloc else strings.toUTF16AllocNoTrim; - const moved_buffer_slice_8 = if (!this.ignore_bom and buffer_slice.len > 3 and std.mem.eql(u8, &[_]u8{ '\xEF', '\xBB', '\xBF' }, buffer_slice[0..3])) - buffer_slice[3..] - else - buffer_slice; + const input, const deinit = input: { + const maybe_without_bom = if (!this.ignore_bom and strings.hasPrefixComptime(buffer_slice, "\xef\xbb\xbf")) + buffer_slice[3..] + else + buffer_slice; + + if (this.buffered.len > 0) { + defer this.buffered.len = 0; + const joined = bun.default_allocator.alloc(u8, maybe_without_bom.len + this.buffered.len) catch { + globalThis.throwOutOfMemory(); + return .zero; + }; + @memcpy(joined[0..this.buffered.len], this.buffered.slice()); + @memcpy(joined[this.buffered.len..][0..maybe_without_bom.len], maybe_without_bom); + break :input .{ joined, true }; + } - if (this.fatal) { - if (toUTF16(default_allocator, moved_buffer_slice_8, true, false)) |result_| { - if (result_) |result| { - return ZigString.toExternalU16(result.ptr, result.len, globalThis); - } - } else |err| { - switch (err) { - error.InvalidByteSequence => { + break :input .{ maybe_without_bom, false }; + }; + + const maybe_decode_result = switch (this.fatal) { + inline else => |fail_if_invalid| strings.toUTF16AllocMaybeBuffered(bun.default_allocator, input, fail_if_invalid, flush) catch |err| { + if (deinit) bun.default_allocator.free(input); + if (comptime fail_if_invalid) { + if (err == error.InvalidByteSequence) { globalThis.ERR_ENCODING_INVALID_ENCODED_DATA("Invalid byte sequence", .{}).throw(); return .zero; - }, - error.OutOfMemory => { - globalThis.throwOutOfMemory(); - return JSValue.zero; - }, - } - } - } else { - if (toUTF16(default_allocator, moved_buffer_slice_8, false, false)) |result_| { - if (result_) |result| { - return ZigString.toExternalU16(result.ptr, result.len, globalThis); + } } - } else |err| { - switch (err) { - error.OutOfMemory => { - globalThis.throwOutOfMemory(); - return JSValue.zero; - }, + + bun.assert(err == error.OutOfMemory); + globalThis.throwOutOfMemory(); + return .zero; + }, + }; + + if (maybe_decode_result) |decode_result| { + if (deinit) bun.default_allocator.free(input); + const decoded, const leftover, const leftover_len = decode_result; + bun.assert(this.buffered.len == 0); + if (comptime !flush) { + if (leftover_len != 0) { + this.buffered.buf = leftover; + this.buffered.len = leftover_len; } } + return ZigString.toExternalU16(decoded.ptr, decoded.len, globalThis); } + bun.debugAssert(input.len == 0 or !deinit); + // Experiment: using mimalloc directly is slightly slower - return ZigString.init(moved_buffer_slice_8).toJS(globalThis); + return ZigString.init(input).toJS(globalThis); }, - EncodingLabel.@"UTF-16LE" => { - const moved_buffer_slice_16 = if (!this.ignore_bom and buffer_slice.len > 2 and std.mem.eql(u8, &[_]u8{ '\xFF', '\xFE' }, buffer_slice[0..2])) + inline .@"UTF-16LE", .@"UTF-16BE" => |utf16_encoding| { + const bom = if (comptime utf16_encoding == .@"UTF-16LE") "\xff\xfe" else "\xfe\xff"; + const input = if (!this.ignore_bom and strings.hasPrefixComptime(buffer_slice, bom)) buffer_slice[2..] else buffer_slice; - if (std.mem.isAligned(@intFromPtr(moved_buffer_slice_16.ptr), @alignOf([*]const u16))) { - return this.decodeUTF16WithAlignment([]align(2) const u16, @as([]align(2) const u16, @alignCast(std.mem.bytesAsSlice(u16, moved_buffer_slice_16))), globalThis); + var decoded, const saw_error = this.decodeUTF16(input, utf16_encoding == .@"UTF-16BE", flush) catch { + globalThis.throwOutOfMemory(); + return .zero; + }; + + if (saw_error and this.fatal) { + decoded.deinit(bun.default_allocator); + globalThis.ERR_ENCODING_INVALID_ENCODED_DATA("The encoded data was not valid {s} data", .{@tagName(utf16_encoding)}).throw(); + return .zero; } - return this.decodeUTF16WithAlignment([]align(1) const u16, std.mem.bytesAsSlice(u16, moved_buffer_slice_16), globalThis); + var output = bun.String.fromUTF16(decoded.items); + return output.toJS(globalThis); }, else => { globalThis.throwInvalidArguments("TextDecoder.decode set to unsupported encoding", .{}); - return JSValue.zero; + return .zero; }, } } @@ -708,7 +972,7 @@ pub const TextDecoder = struct { if (arguments.len > 0) { // encoding if (arguments[0].isString()) { - var str = arguments[0].toSlice(globalThis, default_allocator); + var str = arguments[0].toSlice(globalThis, bun.default_allocator); defer if (str.isAllocated()) str.deinit(); if (EncodingLabel.which(str.slice())) |label| { @@ -753,9 +1017,7 @@ pub const TextDecoder = struct { } } - const result = getAllocator(globalThis).create(TextDecoder) catch unreachable; - result.* = decoder; - return result; + return TextDecoder.new(decoder); } }; diff --git a/src/js/builtins/BunBuiltinNames.h b/src/js/builtins/BunBuiltinNames.h index 1ea5e539866ae7..add726a63e3820 100644 --- a/src/js/builtins/BunBuiltinNames.h +++ b/src/js/builtins/BunBuiltinNames.h @@ -220,9 +220,11 @@ using namespace JSC; macro(stream) \ macro(structuredCloneForStream) \ macro(syscall) \ + macro(textDecoder) \ macro(textDecoderStreamDecoder) \ macro(textDecoderStreamTransform) \ macro(textEncoderStreamEncoder) \ + macro(TextEncoderStreamEncoder) \ macro(textEncoderStreamTransform) \ macro(toNamespacedPath) \ macro(trace) \ diff --git a/src/js/builtins/TextDecoderStream.ts b/src/js/builtins/TextDecoderStream.ts new file mode 100644 index 00000000000000..2a5f1e528d92ae --- /dev/null +++ b/src/js/builtins/TextDecoderStream.ts @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2020 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +export function initializeTextDecoderStream() { + const label = arguments.length >= 1 ? arguments[0] : "utf-8"; + const options = arguments.length >= 2 ? arguments[1] : {}; + + const startAlgorithm = () => { + return Promise.$resolve(); + }; + const transformAlgorithm = chunk => { + const decoder = $getByIdDirectPrivate(this, "textDecoder"); + let buffer; + try { + buffer = decoder.decode(chunk, { stream: true }); + } catch (e) { + return Promise.$reject(e); + } + if (buffer) { + const transformStream = $getByIdDirectPrivate(this, "textDecoderStreamTransform"); + const controller = $getByIdDirectPrivate(transformStream, "controller"); + $transformStreamDefaultControllerEnqueue(controller, buffer); + } + return Promise.$resolve(); + }; + const flushAlgorithm = () => { + const decoder = $getByIdDirectPrivate(this, "textDecoder"); + let buffer; + try { + buffer = decoder.decode(undefined, { stream: false }); + } catch (e) { + return Promise.$reject(e); + } + if (buffer) { + const transformStream = $getByIdDirectPrivate(this, "textDecoderStreamTransform"); + const controller = $getByIdDirectPrivate(transformStream, "controller"); + $transformStreamDefaultControllerEnqueue(controller, buffer); + } + return Promise.$resolve(); + }; + + const transform = $createTransformStream(startAlgorithm, transformAlgorithm, flushAlgorithm); + $putByIdDirectPrivate(this, "textDecoderStreamTransform", transform); + + const fatal = !!options.fatal; + const ignoreBOM = !!options.ignoreBOM; + const decoder = new TextDecoder(label, { fatal, ignoreBOM }); + + $putByIdDirectPrivate(this, "fatal", fatal); + $putByIdDirectPrivate(this, "ignoreBOM", ignoreBOM); + $putByIdDirectPrivate(this, "encoding", decoder.encoding); + $putByIdDirectPrivate(this, "textDecoder", decoder); + + return this; +} + +$getter; +export function encoding() { + if (!$getByIdDirectPrivate(this, "textDecoderStreamTransform")) + throw $makeThisTypeError("TextDecoderStream", "encoding"); + + return $getByIdDirectPrivate(this, "encoding"); +} + +$getter; +export function fatal() { + if (!$getByIdDirectPrivate(this, "textDecoderStreamTransform")) + throw $makeThisTypeError("TextDecoderStream", "fatal"); + + return $getByIdDirectPrivate(this, "fatal"); +} + +$getter; +export function ignoreBOM() { + if (!$getByIdDirectPrivate(this, "textDecoderStreamTransform")) + throw $makeThisTypeError("TextDecoderStream", "ignoreBOM"); + + return $getByIdDirectPrivate(this, "ignoreBOM"); +} + +$getter; +export function readable() { + const transform = $getByIdDirectPrivate(this, "textDecoderStreamTransform"); + if (!transform) throw $makeThisTypeError("TextDecoderStream", "readable"); + + return $getByIdDirectPrivate(transform, "readable"); +} + +$getter; +export function writable() { + const transform = $getByIdDirectPrivate(this, "textDecoderStreamTransform"); + if (!transform) throw $makeThisTypeError("TextDecoderStream", "writable"); + + return $getByIdDirectPrivate(transform, "writable"); +} diff --git a/src/js/builtins/TextEncoderStream.ts b/src/js/builtins/TextEncoderStream.ts new file mode 100644 index 00000000000000..daf38e83413392 --- /dev/null +++ b/src/js/builtins/TextEncoderStream.ts @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2020 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +export function initializeTextEncoderStream() { + const startAlgorithm = () => { + return Promise.$resolve(); + }; + const transformAlgorithm = chunk => { + const encoder = $getByIdDirectPrivate(this, "textEncoderStreamEncoder"); + let buffer; + try { + buffer = encoder.encode(chunk); + } catch (e) { + return Promise.$reject(e); + } + if (buffer) { + const transformStream = $getByIdDirectPrivate(this, "textEncoderStreamTransform"); + const controller = $getByIdDirectPrivate(transformStream, "controller"); + $transformStreamDefaultControllerEnqueue(controller, buffer); + } + return Promise.$resolve(); + }; + const flushAlgorithm = () => { + const encoder = $getByIdDirectPrivate(this, "textEncoderStreamEncoder"); + const buffer = encoder.flush(); + if (buffer) { + const transformStream = $getByIdDirectPrivate(this, "textEncoderStreamTransform"); + const controller = $getByIdDirectPrivate(transformStream, "controller"); + $transformStreamDefaultControllerEnqueue(controller, buffer); + } + return Promise.$resolve(); + }; + + const transform = $createTransformStream(startAlgorithm, transformAlgorithm, flushAlgorithm); + $putByIdDirectPrivate(this, "textEncoderStreamTransform", transform); + $putByIdDirectPrivate(this, "textEncoderStreamEncoder", new $TextEncoderStreamEncoder()); + + return this; +} + +$getter; +export function encoding() { + if (!$getByIdDirectPrivate(this, "textEncoderStreamTransform")) + throw $makeThisTypeError("TextEncoderStream", "encoding"); + + return "utf-8"; +} + +$getter; +export function readable() { + const transform = $getByIdDirectPrivate(this, "textEncoderStreamTransform"); + if (!transform) throw $makeThisTypeError("TextEncoderStream", "readable"); + + return $getByIdDirectPrivate(transform, "readable"); +} + +$getter; +export function writable() { + const transform = $getByIdDirectPrivate(this, "textEncoderStreamTransform"); + if (!transform) throw $makeThisTypeError("TextEncoderStream", "writable"); + + return $getByIdDirectPrivate(transform, "writable"); +} diff --git a/src/js/node/stream.web.ts b/src/js/node/stream.web.ts index 25160c9de4c0de..a0d92215bd5ef0 100644 --- a/src/js/node/stream.web.ts +++ b/src/js/node/stream.web.ts @@ -13,8 +13,8 @@ export default { WritableStreamDefaultController, ByteLengthQueuingStrategy, CountQueuingStrategy, - TextEncoderStream: undefined, - TextDecoderStream: undefined, + TextEncoderStream, + TextDecoderStream, CompressionStream: undefined, DecompressionStream: undefined, }; diff --git a/src/js_ast.zig b/src/js_ast.zig index 4b8f598677888e..f122ab662fe3d6 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -2545,7 +2545,7 @@ pub const E = struct { if (s.isUTF8()) { return JSC.ZigString.fromUTF8(s.slice(allocator)); } else { - return JSC.ZigString.init16(s.slice16()); + return JSC.ZigString.initUTF16(s.slice16()); } } diff --git a/src/string.zig b/src/string.zig index 75188e6136bae9..e7cfbc505b52ef 100644 --- a/src/string.zig +++ b/src/string.zig @@ -92,7 +92,7 @@ pub const WTFStringImplStruct = extern struct { if (this.is8Bit()) { return ZigString.init(this.latin1Slice()); } else { - return ZigString.init16(this.utf16Slice()); + return ZigString.initUTF16(this.utf16Slice()); } } @@ -668,6 +668,10 @@ pub const String = extern struct { return String.init(ZigString.initUTF8(value)); } + pub fn fromUTF16(value: []const u16) String { + return String.init(ZigString.initUTF16(value)); + } + pub fn fromBytes(value: []const u8) String { return String.init(ZigString.fromBytes(value)); } @@ -906,7 +910,7 @@ pub const String = extern struct { if (this.value.WTFStringImpl.is8Bit()) { return String.init(ZigString.init(this.value.WTFStringImpl.latin1Slice()[start_index..end_index])); } else { - return String.init(ZigString.init16(this.value.WTFStringImpl.utf16Slice()[start_index..end_index])); + return String.init(ZigString.initUTF16(this.value.WTFStringImpl.utf16Slice()[start_index..end_index])); } }, else => return this, diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 1b4342865bfe7a..ba6211d3cd6240 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1340,15 +1340,7 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa var remaining = bytes[i..]; { - const sequence: [4]u8 = switch (remaining.len) { - 0 => unreachable, - 1 => [_]u8{ remaining[0], 0, 0, 0 }, - 2 => [_]u8{ remaining[0], remaining[1], 0, 0 }, - 3 => [_]u8{ remaining[0], remaining[1], remaining[2], 0 }, - else => remaining[0..4].*, - }; - - const replacement = strings.convertUTF8BytesIntoUTF16(&sequence); + const replacement = strings.convertUTF8BytesIntoUTF16(remaining); if (comptime fail_if_invalid) { if (replacement.fail) { if (comptime Environment.allow_assert) assert(replacement.code_point == unicode_replacement); @@ -1375,15 +1367,7 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa strings.copyU8IntoU16(output.items[end..][0..j], remaining[0..j]); remaining = remaining[j..]; - const sequence: [4]u8 = switch (remaining.len) { - 0 => unreachable, - 1 => [_]u8{ remaining[0], 0, 0, 0 }, - 2 => [_]u8{ remaining[0], remaining[1], 0, 0 }, - 3 => [_]u8{ remaining[0], remaining[1], remaining[2], 0 }, - else => remaining[0..4].*, - }; - - const replacement = strings.convertUTF8BytesIntoUTF16(&sequence); + const replacement = strings.convertUTF8BytesIntoUTF16(remaining); if (comptime fail_if_invalid) { if (replacement.fail) { if (comptime Environment.allow_assert) assert(replacement.code_point == unicode_replacement); @@ -1436,6 +1420,101 @@ pub fn toUTF16AllocForReal(allocator: std.mem.Allocator, bytes: []const u8, comp }; } +pub fn toUTF16AllocMaybeBuffered( + allocator: std.mem.Allocator, + bytes: []const u8, + comptime fail_if_invalid: bool, + comptime flush: bool, +) error{ OutOfMemory, InvalidByteSequence }!?struct { []u16, [3]u8, u2 } { + const first_non_ascii = strings.firstNonASCII(bytes) orelse return null; + + var output: std.ArrayListUnmanaged(u16) = if (comptime bun.FeatureFlags.use_simdutf) output: { + const out_length = bun.simdutf.length.utf16.from.utf8(bytes); + + if (out_length == 0) { + break :output .{}; + } + + var out = try allocator.alloc(u16, out_length); + + const res = bun.simdutf.convert.utf8.to.utf16.with_errors.le(bytes, out); + if (res.status == .success) { + log("toUTF16 {d} UTF8 -> {d} UTF16", .{ bytes.len, out_length }); + return .{ out, .{0} ** 3, 0 }; + } + + var list = std.ArrayListUnmanaged(u16).fromOwnedSlice(out[0..first_non_ascii]); + list.capacity = out.len; + + break :output list; + } else .{}; + errdefer output.deinit(allocator); + + const start = if (output.items.len > 0) first_non_ascii else 0; + var remaining = bytes[start..]; + + var non_ascii: ?u32 = 0; + while (non_ascii) |i| : (non_ascii = strings.firstNonASCII(remaining)) { + { + const end = output.items.len; + try output.ensureUnusedCapacity(allocator, i + 2); // +2 for UTF16 codepoint + output.items.len += i; + strings.copyU8IntoU16(output.items[end..][0..i], remaining[0..i]); + remaining = remaining[i..]; + } + + const sequence: [4]u8 = switch (remaining.len) { + 0 => unreachable, + 1 => .{ remaining[0], 0, 0, 0 }, + 2 => .{ remaining[0], remaining[1], 0, 0 }, + 3 => .{ remaining[0], remaining[1], remaining[2], 0 }, + else => remaining[0..4].*, + }; + + const converted_length = strings.nonASCIISequenceLength(sequence[0]); + + const converted = strings.convertUTF8BytesIntoUTF16WithLength(&sequence, converted_length, remaining.len); + + if (comptime !flush) { + if (converted.fail and converted.can_buffer and converted_length > remaining.len) { + const buffered: [3]u8 = switch (remaining.len) { + else => unreachable, + 1 => .{ remaining[0], 0, 0 }, + 2 => .{ remaining[0], remaining[1], 0 }, + 3 => .{ remaining[0], remaining[1], remaining[2] }, + }; + return .{ output.items, buffered, @intCast(remaining.len) }; + } + } + + if (comptime fail_if_invalid) { + if (converted.fail) { + if (comptime Environment.allow_assert) { + bun.assert(converted.code_point == unicode_replacement); + } + return error.InvalidByteSequence; + } + } + + remaining = remaining[@max(converted.len, 1)..]; + + // #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + switch (converted.code_point) { + 0...0xffff => |c| output.appendAssumeCapacity(@intCast(c)), + else => |c| output.appendSliceAssumeCapacity(&.{ strings.u16Lead(c), strings.u16Trail(c) }), + } + } + + if (remaining.len > 0) { + try output.ensureTotalCapacityPrecise(allocator, output.items.len + remaining.len); + output.items.len += remaining.len; + strings.copyU8IntoU16(output.items[output.items.len - remaining.len ..], remaining); + } + + log("toUTF16 {d} UTF8 -> {d} UTF16", .{ bytes.len, output.items.len }); + return .{ output.items, .{0} ** 3, 0 }; +} + pub fn toUTF16AllocNoTrim(allocator: std.mem.Allocator, bytes: []const u8, comptime fail_if_invalid: bool, comptime _: bool) !?[]u16 { if (strings.firstNonASCII(bytes)) |i| { const output_: ?std.ArrayList(u16) = if (comptime bun.FeatureFlags.use_simdutf) simd: { @@ -1474,15 +1553,7 @@ pub fn toUTF16AllocNoTrim(allocator: std.mem.Allocator, bytes: []const u8, compt var remaining = bytes[i..]; { - const sequence: [4]u8 = switch (remaining.len) { - 0 => unreachable, - 1 => [_]u8{ remaining[0], 0, 0, 0 }, - 2 => [_]u8{ remaining[0], remaining[1], 0, 0 }, - 3 => [_]u8{ remaining[0], remaining[1], remaining[2], 0 }, - else => remaining[0..4].*, - }; - - const replacement = strings.convertUTF8BytesIntoUTF16(&sequence); + const replacement = strings.convertUTF8BytesIntoUTF16(remaining); if (comptime fail_if_invalid) { if (replacement.fail) { if (comptime Environment.allow_assert) assert(replacement.code_point == unicode_replacement); @@ -1509,15 +1580,7 @@ pub fn toUTF16AllocNoTrim(allocator: std.mem.Allocator, bytes: []const u8, compt strings.copyU8IntoU16(output.items[end..][0..j], remaining[0..j]); remaining = remaining[j..]; - const sequence: [4]u8 = switch (remaining.len) { - 0 => unreachable, - 1 => [_]u8{ remaining[0], 0, 0, 0 }, - 2 => [_]u8{ remaining[0], remaining[1], 0, 0 }, - 3 => [_]u8{ remaining[0], remaining[1], remaining[2], 0 }, - else => remaining[0..4].*, - }; - - const replacement = strings.convertUTF8BytesIntoUTF16(&sequence); + const replacement = strings.convertUTF8BytesIntoUTF16(remaining); if (comptime fail_if_invalid) { if (replacement.fail) { if (comptime Environment.allow_assert) assert(replacement.code_point == unicode_replacement); @@ -2076,7 +2139,9 @@ pub const UTF16Replacement = struct { /// and a genuine error. fail: bool = false, - pub inline fn utf8Width(replacement: UTF16Replacement) usize { + can_buffer: bool = true, + + pub inline fn utf8Width(replacement: UTF16Replacement) u3 { return switch (replacement.code_point) { 0...0x7F => 1, (0x7F + 1)...0x7FF => 2, @@ -2086,10 +2151,8 @@ pub const UTF16Replacement = struct { } }; -// This variation matches WebKit behavior. -fn convertUTF8BytesIntoUTF16(sequence: *const [4]u8) UTF16Replacement { +fn convertUTF8BytesIntoUTF16WithLength(sequence: *const [4]u8, len: u3, remaining_len: usize) UTF16Replacement { if (comptime Environment.allow_assert) assert(sequence[0] > 127); - const len = nonASCIISequenceLength(sequence[0]); switch (len) { 2 => { if (comptime Environment.allow_assert) { @@ -2097,7 +2160,7 @@ fn convertUTF8BytesIntoUTF16(sequence: *const [4]u8) UTF16Replacement { bun.assert(sequence[0] <= 0xDF); } if (sequence[1] < 0x80 or sequence[1] > 0xBF) { - return .{ .len = 1, .fail = true }; + return .{ .len = 1, .fail = true, .can_buffer = remaining_len < 2 }; } return .{ .len = len, .code_point = ((@as(u32, sequence[0]) << 6) + @as(u32, sequence[1])) - 0x00003080 }; }, @@ -2109,22 +2172,22 @@ fn convertUTF8BytesIntoUTF16(sequence: *const [4]u8) UTF16Replacement { switch (sequence[0]) { 0xE0 => { if (sequence[1] < 0xA0 or sequence[1] > 0xBF) { - return .{ .len = 1, .fail = true }; + return .{ .len = 1, .fail = true, .can_buffer = remaining_len < 2 }; } }, 0xED => { if (sequence[1] < 0x80 or sequence[1] > 0x9F) { - return .{ .len = 1, .fail = true }; + return .{ .len = 1, .fail = true, .can_buffer = remaining_len < 2 }; } }, else => { if (sequence[1] < 0x80 or sequence[1] > 0xBF) { - return .{ .len = 1, .fail = true }; + return .{ .len = 1, .fail = true, .can_buffer = remaining_len < 2 }; } }, } if (sequence[2] < 0x80 or sequence[2] > 0xBF) { - return .{ .len = 2, .fail = true }; + return .{ .len = 2, .fail = true, .can_buffer = remaining_len < 3 }; } return .{ .len = len, @@ -2135,36 +2198,36 @@ fn convertUTF8BytesIntoUTF16(sequence: *const [4]u8) UTF16Replacement { switch (sequence[0]) { 0xF0 => { if (sequence[1] < 0x90 or sequence[1] > 0xBF) { - return .{ .len = 1, .fail = true }; + return .{ .len = 1, .fail = true, .can_buffer = remaining_len < 2 }; } }, 0xF4 => { if (sequence[1] < 0x80 or sequence[1] > 0x8F) { - return .{ .len = 1, .fail = true }; + return .{ .len = 1, .fail = true, .can_buffer = remaining_len < 2 }; } }, // invalid code point // this used to be an assertion 0...(0xF0 - 1), 0xF4 + 1...std.math.maxInt(@TypeOf(sequence[0])) => { - return UTF16Replacement{ .len = 1, .fail = true }; + return .{ .len = 1, .fail = true, .can_buffer = false }; }, else => { if (sequence[1] < 0x80 or sequence[1] > 0xBF) { - return .{ .len = 1, .fail = true }; + return .{ .len = 1, .fail = true, .can_buffer = remaining_len < 2 }; } }, } if (sequence[2] < 0x80 or sequence[2] > 0xBF) { - return .{ .len = 2, .fail = true }; + return .{ .len = 2, .fail = true, .can_buffer = remaining_len < 3 }; } if (sequence[3] < 0x80 or sequence[3] > 0xBF) { - return .{ .len = 3, .fail = true }; + return .{ .len = 3, .fail = true, .can_buffer = remaining_len < 4 }; } return .{ - .len = 4, + .len = len, .code_point = ((@as(u32, sequence[0]) << 18) + (@as(u32, sequence[1]) << 12) + (@as(u32, sequence[2]) << 6) + @as(u32, sequence[3])) - 0x03C82080, @@ -2176,6 +2239,21 @@ fn convertUTF8BytesIntoUTF16(sequence: *const [4]u8) UTF16Replacement { } } +// This variation matches WebKit behavior. +// fn convertUTF8BytesIntoUTF16(sequence: *const [4]u8, remaining_len: usize) UTF16Replacement { +fn convertUTF8BytesIntoUTF16(bytes: []const u8) UTF16Replacement { + const sequence: [4]u8 = switch (bytes.len) { + 0 => unreachable, + 1 => [_]u8{ bytes[0], 0, 0, 0 }, + 2 => [_]u8{ bytes[0], bytes[1], 0, 0 }, + 3 => [_]u8{ bytes[0], bytes[1], bytes[2], 0 }, + else => bytes[0..4].*, + }; + if (comptime Environment.allow_assert) assert(sequence[0] > 127); + const sequence_length = nonASCIISequenceLength(sequence[0]); + return convertUTF8BytesIntoUTF16WithLength(&sequence, sequence_length, bytes.len); +} + pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) EncodeIntoResult { return copyLatin1IntoUTF8StopOnNonASCII(buf_, Type, latin1_, false); } @@ -3337,6 +3415,35 @@ pub fn encodeWTF8RuneT(p: *[4]u8, comptime R: type, r: R) u3 { } } +pub fn wtf8Sequence(code_point: u32) [4]u8 { + return switch (code_point) { + 0...0x7f => .{ + @intCast(code_point), + 0, + 0, + 0, + }, + (0x7f + 1)...0x7ff => .{ + @truncate(0xc0 | (code_point >> 6)), + @truncate(0x80 | (code_point & 0x3f)), + 0, + 0, + }, + (0x7ff + 1)...0xffff => .{ + @truncate(0xe0 | (code_point >> 12)), + @truncate(0x80 | ((code_point >> 6) & 0x3f)), + @truncate(0x80 | (code_point & 0x3f)), + 0, + }, + else => .{ + @truncate(0xf0 | (code_point >> 18)), + @truncate(0x80 | ((code_point >> 12) & 0x3f)), + @truncate(0x80 | ((code_point >> 6) & 0x3f)), + @truncate(0x80 | (code_point & 0x3f)), + }, + }; +} + pub inline fn wtf8ByteSequenceLength(first_byte: u8) u3 { return switch (first_byte) { 0 => 0, @@ -3521,16 +3628,36 @@ pub fn isAllASCII(slice: []const u8) bool { return true; } -//#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) +// #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) pub inline fn u16Lead(supplementary: anytype) u16 { - return @as(u16, @intCast((supplementary >> 10) + 0xd7c0)); + return @intCast((supplementary >> 10) + 0xd7c0); } -//#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) +// #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) pub inline fn u16Trail(supplementary: anytype) u16 { - return @as(u16, @intCast((supplementary & 0x3ff) | 0xdc00)); + return @intCast((supplementary & 0x3ff) | 0xdc00); +} + +// #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) +pub inline fn u16IsTrail(supplementary: u16) bool { + return (@as(u32, @intCast(supplementary)) & 0xfffffc00) == 0xdc00; +} + +// #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) +pub inline fn u16IsLead(supplementary: u16) bool { + return (@as(u32, @intCast(supplementary)) & 0xfffffc00) == 0xd800; +} + +// #define U16_GET_SUPPLEMENTARY(lead, trail) \ +// (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) +pub inline fn u16GetSupplementary(lead: u32, trail: u32) u32 { + const shifted = lead << 10; + return (shifted + trail) - u16_surrogate_offset; } +// #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) +pub const u16_surrogate_offset = 56613888; + pub fn firstNonASCII(slice: []const u8) ?u32 { return firstNonASCIIWithType([]const u8, slice); } diff --git a/test/harness.ts b/test/harness.ts index 84715b689957f6..638248f6c0d03c 100644 --- a/test/harness.ts +++ b/test/harness.ts @@ -1174,6 +1174,17 @@ export function isMacOSVersionAtLeast(minVersion: number): boolean { return parseFloat(macOSVersion) >= minVersion; } +export function readableStreamFromArray(array) { + return new ReadableStream({ + pull(controller) { + for (let entry of array) { + controller.enqueue(entry); + } + controller.close(); + }, + }); +} + let hasGuardMalloc = -1; export function forceGuardMalloc(env) { if (process.platform !== "darwin") { diff --git a/test/js/web/encoding/encode-bad-chunks.test.ts b/test/js/web/encoding/encode-bad-chunks.test.ts new file mode 100644 index 00000000000000..426664760bf57c --- /dev/null +++ b/test/js/web/encoding/encode-bad-chunks.test.ts @@ -0,0 +1,74 @@ +// META: global=window,worker +// META: script=resources/readable-stream-from-array.js +// META: script=resources/readable-stream-to-array.js + +// https://github.com/WebKit/WebKit/blob/443e796d1538654c34f2690e39600c70c8052b63/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/encode-bad-chunks.any.js#L5 + +import { test, expect } from "bun:test"; +import { readableStreamFromArray } from "harness"; + +const error1 = new Error("error1"); +error1.name = "error1"; + +test("a chunk that cannot be converted to a string should error the streams", () => { + const ts = new TextEncoderStream(); + const writer = ts.writable.getWriter(); + const reader = ts.readable.getReader(); + const writePromise = writer.write({ + toString() { + throw error1; + }, + }); + const readPromise = reader.read(); + expect(async () => { + await readPromise; + }).toThrow(error1); + expect(async () => { + await writePromise; + }).toThrow(error1); + expect(async () => { + await reader.closed; + }).toThrow(error1); + expect(async () => { + await writer.closed; + }).toThrow(error1); +}); + +const oddInputs = [ + { + name: "undefined", + value: undefined, + expected: "undefined", + }, + { + name: "null", + value: null, + expected: "null", + }, + { + name: "numeric", + value: 3.14, + expected: "3.14", + }, + { + name: "object", + value: {}, + expected: "[object Object]", + }, + { + name: "array", + value: ["hi"], + expected: "hi", + }, +]; + +for (const input of oddInputs) { + test(`input of type ${input.name} should be converted correctly to string`, async () => { + const outputReadable = readableStreamFromArray([input.value]) + .pipeThrough(new TextEncoderStream()) + .pipeThrough(new TextDecoderStream()); + const output = await Bun.readableStreamToArray(outputReadable); + expect(output.length, "output should contain one chunk").toBe(1); + expect(output[0], "output should be correct").toBe(input.expected); + }); +} diff --git a/test/js/web/encoding/text-decoder-stream.test.ts b/test/js/web/encoding/text-decoder-stream.test.ts new file mode 100644 index 00000000000000..8c86f90cee5c27 --- /dev/null +++ b/test/js/web/encoding/text-decoder-stream.test.ts @@ -0,0 +1,167 @@ +import { test, expect } from "bun:test"; +import { readableStreamFromArray } from "harness"; + +{ + // META: global=window,worker + // META: script=resources/readable-stream-from-array.js + // META: script=resources/readable-stream-to-array.js + // META: script=/common/sab.js + + // https://github.com/WebKit/WebKit/blob/443e796d1538654c34f2690e39600c70c8052b63/LayoutTests/imported/w3c/web-platform-tests/encoding/streams/decode-utf8.any.js#L5 + + [ArrayBuffer, SharedArrayBuffer].forEach(arrayBufferOrSharedArrayBuffer => { + const inputChunkData = [73, 32, 240, 159, 146, 153, 32, 115, 116, 114, 101, 97, 109, 115]; + + const emptyChunk = new Uint8Array(new arrayBufferOrSharedArrayBuffer(0)); + const inputChunk = new Uint8Array(new arrayBufferOrSharedArrayBuffer(inputChunkData.length)); + + inputChunk.set(inputChunkData); + + const expectedOutputString = "I \u{1F499} streams"; + + test( + "decoding one UTF-8 chunk should give one output string - " + arrayBufferOrSharedArrayBuffer.name, + async () => { + const input = readableStreamFromArray([inputChunk]); + const output = input.pipeThrough(new TextDecoderStream()); + const array = await Bun.readableStreamToArray(output); + expect(array, "the output should be in one chunk").toEqual([expectedOutputString]); + }, + ); + + test("decoding an empty chunk should give no output chunks - " + arrayBufferOrSharedArrayBuffer.name, async () => { + const input = readableStreamFromArray([emptyChunk]); + const output = input.pipeThrough(new TextDecoderStream()); + const array = await Bun.readableStreamToArray(output); + expect(array, "no chunks should be output").toEqual([]); + }); + + test("an initial empty chunk should be ignored - " + arrayBufferOrSharedArrayBuffer.name, async () => { + const input = readableStreamFromArray([emptyChunk, inputChunk]); + const output = input.pipeThrough(new TextDecoderStream()); + const array = await Bun.readableStreamToArray(output); + expect(array, "the output should be in one chunk").toEqual([expectedOutputString]); + }); + + test("a trailing empty chunk should be ignored - " + arrayBufferOrSharedArrayBuffer.name, async () => { + const input = readableStreamFromArray([inputChunk, emptyChunk]); + const output = input.pipeThrough(new TextDecoderStream()); + const array = await Bun.readableStreamToArray(output); + expect(array, "the output should be in one chunk").toEqual([expectedOutputString]); + }); + + test("UTF-8 EOF handling - " + arrayBufferOrSharedArrayBuffer.name, async () => { + const chunk = new Uint8Array(new arrayBufferOrSharedArrayBuffer(3)); + chunk.set([0xf0, 0x9f, 0x92]); + const input = readableStreamFromArray([chunk]); + const output = input.pipeThrough(new TextDecoderStream()); + const array = await Bun.readableStreamToArray(output); + expect(array).toEqual(["\uFFFD"]); + }); + }); + + test("decoding a transferred Uint8Array chunk should give no output", async () => { + const buffer = new ArrayBuffer(3); + const view = new Uint8Array(buffer, 1, 1); + view[0] = 65; + new MessageChannel().port1.postMessage(buffer, [buffer]); + const input = readableStreamFromArray([view]); + const output = input.pipeThrough(new TextDecoderStream()); + const array = await Bun.readableStreamToArray(output); + expect(array, "no chunks should be output").toEqual([]); + }); + + test("decoding a transferred ArrayBuffer chunk should give no output", async () => { + const buffer = new ArrayBuffer(1); + new MessageChannel().port1.postMessage(buffer, [buffer]); + const input = readableStreamFromArray([buffer]); + const output = input.pipeThrough(new TextDecoderStream()); + const array = await Bun.readableStreamToArray(output); + expect(array, "no chunks should be output").toEqual([]); + }); +} + +{ + // https://github.com/nodejs/node/blob/926503b66910d9ec895c33c7fd94361fd78dea72/test/fixtures/wpt/encoding/streams/decode-attributes.any.js#L3 + + // META: global=window,worker,shadowrealm + + // Verify that constructor arguments are correctly reflected in the attributes. + + // Mapping of the first argument to TextDecoderStream to the expected value of + // the encoding attribute. We assume that if this subset works correctly, the + // rest probably work too. + const labelToName = { + "unicode-1-1-utf-8": "utf-8", + // "iso-8859-2": "iso-8859-2", + "ascii": "windows-1252", + "utf-16": "utf-16le", + }; + + for (const label of Object.keys(labelToName)) { + test(`encoding attribute should have correct value for '${label}'`, () => { + const stream = new TextDecoderStream(label); + expect(stream.encoding, "encoding should match").toBe(labelToName[label]); + }); + } + + for (const falseValue of [false, 0, "", undefined, null]) { + test(`setting fatal to '${falseValue}' should set the attribute to false`, () => { + const stream = new TextDecoderStream("utf-8", { fatal: falseValue }); + expect(stream.fatal, "fatal should be false").toBeFalse(); + }); + + test(`setting ignoreBOM to '${falseValue}' should set the attribute to false`, () => { + const stream = new TextDecoderStream("utf-8", { ignoreBOM: falseValue }); + expect(stream.ignoreBOM, "ignoreBOM should be false").toBeFalse(); + }); + } + + for (const trueValue of [true, 1, {}, [], "yes"]) { + test(`setting fatal to '${trueValue}' should set the attribute to true`, () => { + const stream = new TextDecoderStream("utf-8", { fatal: trueValue }); + expect(stream.fatal, "fatal should be true").toBeTrue(); + }); + + test(`setting ignoreBOM to '${trueValue}' should set the attribute to true`, () => { + const stream = new TextDecoderStream("utf-8", { ignoreBOM: trueValue }); + expect(stream.ignoreBOM, "ignoreBOM should be true").toBeTrue(); + }); + } + + test("constructing with an invalid encoding should throw", () => { + expect(() => { + new TextDecoderStream(""); + }).toThrow(TypeError); + }); + + test("constructing with a non-stringifiable encoding should throw", () => { + expect(() => { + new TextDecoderStream({ + toString() { + return {}; + }, + }); + }).toThrow(TypeError); + }); + + test("a throwing fatal member should cause the constructor to throw", () => { + expect(() => { + new TextDecoderStream("utf-8", { + get fatal() { + throw new Error(); + }, + }); + }).toThrow(Error); + }); + + test("a throwing ignoreBOM member should cause the constructor to throw", () => { + expect(() => { + new TextDecoderStream("utf-8", { + get ignoreBOM() { + throw new Error(); + }, + }); + }).toThrow(Error); + }); +} diff --git a/test/js/web/encoding/text-decoder.test.js b/test/js/web/encoding/text-decoder.test.js index 10b29557cfb675..91bba95b7e691c 100644 --- a/test/js/web/encoding/text-decoder.test.js +++ b/test/js/web/encoding/text-decoder.test.js @@ -293,6 +293,9 @@ describe("TextDecoder ignoreBOM", () => { const decoder_not_ignore_bom = new TextDecoder(encoding, { ignoreBOM: false }); expect(decoder_not_ignore_bom.decode(array)).toStrictEqual("abc"); + + const decoder_not_ignore_bom_default = new TextDecoder(encoding); + expect(decoder_not_ignore_bom_default.decode(array)).toStrictEqual(`abc`); }); }); @@ -319,3 +322,218 @@ it.each([ const output = decoder.decode(Uint8Array.from(input)); expect(output).toBe("\uFFFD\uFFFD"); }); + +// https://github.com/nodejs/node/blob/492032f34c1bf264eae01dc5cdfc77c8032b8552/test/fixtures/wpt/encoding/textdecoder-fatal-streaming.any.js#L4 +it("Fatal flag, non-streaming cases", () => { + [ + { encoding: "utf-8", sequence: [0xc0] }, + { encoding: "utf-16le", sequence: [0x00] }, + { encoding: "utf-16be", sequence: [0x00] }, + ].forEach(function (testCase) { + expect( + () => { + var decoder = new TextDecoder(testCase.encoding, { fatal: true }); + decoder.decode(new Uint8Array(testCase.sequence)); + }, + "Unterminated " + testCase.encoding + " sequence should throw if fatal flag is set", + ).toThrow(); + + expect( + new TextDecoder(testCase.encoding).decode(new Uint8Array([testCase.sequence])), + "Unterminated UTF-8 sequence should emit replacement character if fatal flag is unset", + ).toBe("\uFFFD"); + }); +}); + +describe("stream", () => { + { + // https://github.com/nodejs/node/blob/492032f34c1bf264eae01dc5cdfc77c8032b8552/test/fixtures/wpt/encoding/textdecoder-arguments.any.js#L3 + it("TextDecoder decode() with explicit undefined", () => { + const decoder = new TextDecoder(); + + // Just passing nothing. + expect(decoder.decode(undefined), "Undefined as first arg should decode to empty string").toBe(""); + + // Flushing an incomplete sequence. + decoder.decode(new Uint8Array([0xc9]), { stream: true }); + expect(decoder.decode(undefined), "Undefined as first arg should flush the stream").toBe("\uFFFD"); + }); + + it("TextDecoder decode() with undefined and undefined", () => { + const decoder = new TextDecoder(); + + // Just passing nothing. + expect(decoder.decode(undefined, undefined), "Undefined as first arg should decode to empty string").toBe(""); + + // Flushing an incomplete sequence. + decoder.decode(new Uint8Array([0xc9]), { stream: true }); + expect(decoder.decode(undefined, undefined), "Undefined as first arg should flush the stream").toBe("\uFFFD"); + }); + + it("TextDecoder decode() with undefined and options", () => { + const decoder = new TextDecoder(); + + // Just passing nothing. + expect(decoder.decode(undefined, {}), "Undefined as first arg should decode to empty string").toBe(""); + + // Flushing an incomplete sequence. + decoder.decode(new Uint8Array([0xc9]), { stream: true }); + expect(decoder.decode(undefined, {}), "Undefined as first arg should flush the stream").toBe("\uFFFD"); + }); + } + { + // https://github.com/nodejs/node/blob/492032f34c1bf264eae01dc5cdfc77c8032b8552/test/fixtures/wpt/encoding/textdecoder-eof.any.js#L14 + it("TextDecoder end-of-queue handling using stream: true", () => { + const decoder = new TextDecoder(); + decoder.decode(new Uint8Array([0xf0]), { stream: true }); + expect(decoder.decode()).toBe("\uFFFD"); + + decoder.decode(new Uint8Array([0xf0]), { stream: true }); + decoder.decode(new Uint8Array([0x9f]), { stream: true }); + expect(decoder.decode()).toBe("\uFFFD"); + + decoder.decode(new Uint8Array([0xf0, 0x9f]), { stream: true }); + expect(decoder.decode(new Uint8Array([0x92]))).toBe("\uFFFD"); + + expect(decoder.decode(new Uint8Array([0xf0, 0x9f]), { stream: true })).toBe(""); + expect(decoder.decode(new Uint8Array([0x41]), { stream: true })).toBe("\uFFFDA"); + expect(decoder.decode()).toBe(""); + + expect(decoder.decode(new Uint8Array([0xf0, 0x41, 0x42]), { stream: true })).toBe("\uFFFDAB"); + expect(decoder.decode()).toBe(""); + + expect(decoder.decode(new Uint8Array([0xf0, 0x41, 0xf0]), { stream: true })).toBe("\uFFFDA"); + expect(decoder.decode()).toBe("\uFFFD"); + + expect(decoder.decode(new Uint8Array([0xf0]), { stream: true })).toBe(""); + expect(decoder.decode(new Uint8Array([0x8f]), { stream: true })).toBe("\uFFFD\uFFFD"); + expect(decoder.decode(new Uint8Array([0x92]), { stream: true })).toBe("\uFFFD"); + expect(decoder.decode()).toBe(""); + }); + } + { + // https://github.com/WebKit/WebKit/blob/443e796d1538654c34f2690e39600c70c8052b63/LayoutTests/imported/w3c/web-platform-tests/encoding/textdecoder-fatal-streaming.any.js#L22 + it("Fatal flag, streaming cases", () => { + var decoder = new TextDecoder("utf-16le", { fatal: true }); + var odd = new Uint8Array([0x00]); + var even = new Uint8Array([0x00, 0x00]); + + expect(decoder.decode(odd, { stream: true })).toBe(""); + expect(decoder.decode(odd, { stream: true })).toBe("\u0000"); + + expect(() => { + decoder.decode(even, { stream: true }); + decoder.decode(odd); + }).toThrow(TypeError); + + expect(() => { + decoder.decode(odd, { stream: true }); + decoder.decode(even); + }).toThrow(TypeError); + + expect(decoder.decode(even, { stream: true })).toBe("\u0000"); + expect(() => { + decoder.decode(odd); + }).toThrow(TypeError); + // expect(decoder.decode(odd)).toBe("\u0000"); + }); + } + { + // https://github.com/nodejs/node/blob/926503b66910d9ec895c33c7fd94361fd78dea72/test/fixtures/wpt/encoding/textdecoder-streaming.any.js#L6 + // META: title=Encoding API: Streaming decode + // META: global=window,worker + // META: script=resources/encodings.js + // META: script=/common/sab.js + + var string = "\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF"; + var octets = { + "utf-8": [ + 0x00, 0x31, 0x32, 0x33, 0x41, 0x42, 0x43, 0x61, 0x62, 0x63, 0xc2, 0x80, 0xc3, 0xbf, 0xc4, 0x80, 0xe1, 0x80, + 0x80, 0xef, 0xbf, 0xbd, 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf, + ], + "utf-16le": [ + 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, + 0x63, 0x00, 0x80, 0x00, 0xff, 0x00, 0x00, 0x01, 0x00, 0x10, 0xfd, 0xff, 0x00, 0xd8, 0x00, 0xdc, 0xff, 0xdb, + 0xff, 0xdf, + ], + "utf-16be": [ + 0x00, 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00, 0x43, 0x00, 0x61, 0x00, 0x62, + 0x00, 0x63, 0x00, 0x80, 0x00, 0xff, 0x01, 0x00, 0x10, 0x00, 0xff, 0xfd, 0xd8, 0x00, 0xdc, 0x00, 0xdb, 0xff, + 0xdf, 0xff, + ], + }; + + [ArrayBuffer, SharedArrayBuffer].forEach(arrayBufferOrSharedArrayBuffer => { + Object.keys(octets).forEach(function (encoding) { + for (var len = 1; len <= 5; ++len) { + it( + "Streaming decode: " + encoding + ", " + len + " byte window (" + arrayBufferOrSharedArrayBuffer.name + ")", + () => { + var encoded = octets[encoding]; + + var out = ""; + var decoder = new TextDecoder(encoding); + for (var i = 0; i < encoded.length; i += len) { + var sub = []; + for (var j = i; j < encoded.length && j < i + len; ++j) { + sub.push(encoded[j]); + } + var uintArray = new Uint8Array(new arrayBufferOrSharedArrayBuffer(sub.length)); + uintArray.set(sub); + out += decoder.decode(uintArray, { stream: true }); + } + out += decoder.decode(); + expect(out).toEqual(string); + }, + ); + } + }); + + it(`Streaming decode: UTF-8 chunk tests (${arrayBufferOrSharedArrayBuffer.name})`, () => { + function bytes(byteArray) { + const view = new Uint8Array(new arrayBufferOrSharedArrayBuffer(byteArray.length)); + view.set(byteArray); + return view; + } + + const decoder = new TextDecoder(); + + expect(decoder.decode(bytes([0xc1]), { stream: true })).toEqual("\uFFFD"); + expect(decoder.decode()).toEqual(""); + + expect(decoder.decode(bytes([0xf5]), { stream: true })).toEqual("\uFFFD"); + expect(decoder.decode()).toEqual(""); + + expect(decoder.decode(bytes([0xe0, 0x41]), { stream: true })).toEqual("\uFFFDA"); + expect(decoder.decode(bytes([0x42]))).toEqual("B"); + + expect(decoder.decode(bytes([0xe0, 0x80]), { stream: true })).toEqual("\uFFFD\uFFFD"); + expect(decoder.decode(bytes([0x80]))).toEqual("\uFFFD"); + + expect(decoder.decode(bytes([0xed, 0xa0]), { stream: true })).toEqual("\uFFFD\uFFFD"); + expect(decoder.decode(bytes([0x80]))).toEqual("\uFFFD"); + + expect(decoder.decode(bytes([0xf0, 0x41]), { stream: true })).toEqual("\uFFFDA"); + expect(decoder.decode(bytes([0x42]), { stream: true })).toEqual("B"); + expect(decoder.decode(bytes([0x43]))).toEqual("C"); + + expect(decoder.decode(bytes([0xf0, 0x80]), { stream: true })).toEqual("\uFFFD\uFFFD"); + expect(decoder.decode(bytes([0x80]), { stream: true })).toEqual("\uFFFD"); + expect(decoder.decode(bytes([0x80]))).toEqual("\uFFFD"); + + expect(decoder.decode(bytes([0xf4, 0xa0]), { stream: true })).toEqual("\uFFFD\uFFFD"); + expect(decoder.decode(bytes([0x80]), { stream: true })).toEqual("\uFFFD"); + expect(decoder.decode(bytes([0x80]))).toEqual("\uFFFD"); + + expect(decoder.decode(bytes([0xf0, 0x90, 0x41]), { stream: true })).toEqual("\uFFFDA"); + expect(decoder.decode(bytes([0x42]))).toEqual("B"); + + // 4-byte UTF-8 sequences always correspond to non-BMP characters. Here + // we make sure that, although the first 3 bytes are enough to emit the + // lead surrogate, it only gets emitted when the fourth byte is read. + expect(decoder.decode(bytes([0xf0, 0x9f, 0x92]), { stream: true })).toEqual(""); + expect(decoder.decode(bytes([0xa9]))).toEqual("\u{1F4A9}"); + }); + }); + } +}); diff --git a/test/js/web/encoding/text-encoder-stream.test.ts b/test/js/web/encoding/text-encoder-stream.test.ts new file mode 100644 index 00000000000000..217e2d91a21e50 --- /dev/null +++ b/test/js/web/encoding/text-encoder-stream.test.ts @@ -0,0 +1,144 @@ +import { test, expect } from "bun:test"; +import { readableStreamFromArray } from "harness"; + +// META: global=window,worker +// META: script=resources/readable-stream-from-array.js +// META: script=resources/readable-stream-to-array.js + +const inputString = "I \u{1F499} streams"; +const expectedOutputBytes = [0x49, 0x20, 0xf0, 0x9f, 0x92, 0x99, 0x20, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x73]; +// This is a character that must be represented in two code units in a string, +// ie. it is not in the Basic Multilingual Plane. +const astralCharacter = "\u{1F499}"; // BLUE HEART +const astralCharacterEncoded = [0xf0, 0x9f, 0x92, 0x99]; +const leading = astralCharacter[0]; +const trailing = astralCharacter[1]; +const replacementEncoded = [0xef, 0xbf, 0xbd]; + +// These tests assume that the implementation correctly classifies leading and +// trailing surrogates and treats all the code units in each set equivalently. + +const testCases = [ + { + input: [inputString], + output: [expectedOutputBytes], + description: "encoding one string of UTF-8 should give one complete chunk", + }, + { + input: [leading, trailing], + output: [astralCharacterEncoded], + description: "a character split between chunks should be correctly encoded", + }, + { + input: [leading, trailing + astralCharacter], + output: [astralCharacterEncoded.concat(astralCharacterEncoded)], + description: "a character following one split between chunks should be " + "correctly encoded", + }, + { + input: [leading, trailing + leading, trailing], + output: [astralCharacterEncoded, astralCharacterEncoded], + description: "two consecutive astral characters each split down the " + "middle should be correctly reassembled", + }, + { + input: [leading, trailing + leading + leading, trailing], + output: [astralCharacterEncoded.concat(replacementEncoded), astralCharacterEncoded], + description: + "two consecutive astral characters each split down the " + + "middle with an invalid surrogate in the middle should be correctly " + + "encoded", + }, + { + input: [leading], + output: [replacementEncoded], + description: "a stream ending in a leading surrogate should emit a " + "replacement character as a final chunk", + }, + { + input: [leading, astralCharacter], + output: [replacementEncoded.concat(astralCharacterEncoded)], + description: + "an unmatched surrogate at the end of a chunk followed by " + + "an astral character in the next chunk should be replaced with " + + "the replacement character at the start of the next output chunk", + }, + { + input: [leading, "A"], + output: [replacementEncoded.concat([65])], + description: + "an unmatched surrogate at the end of a chunk followed by " + + "an ascii character in the next chunk should be replaced with " + + "the replacement character at the start of the next output chunk", + }, + { + input: [leading, leading, trailing], + output: [replacementEncoded, astralCharacterEncoded], + description: + "an unmatched surrogate at the end of a chunk followed by " + + "a plane 1 character split into two chunks should result in " + + "the encoded plane 1 character appearing in the last output chunk", + }, + { + input: [leading, leading], + output: [replacementEncoded, replacementEncoded], + description: "two leading chunks should result in two replacement " + "characters", + }, + { + input: [leading + leading, trailing], + output: [replacementEncoded, astralCharacterEncoded], + description: "a non-terminal unpaired leading surrogate should " + "immediately be replaced", + }, + { + input: [trailing, astralCharacter], + output: [replacementEncoded, astralCharacterEncoded], + description: "a terminal unpaired trailing surrogate should " + "immediately be replaced", + }, + { + input: [leading, "", trailing], + output: [astralCharacterEncoded], + description: "a leading surrogate chunk should be carried past empty chunks", + }, + { + input: [leading, ""], + output: [replacementEncoded], + description: "a leading surrogate chunk should error when it is clear " + "it didn't form a pair", + }, + { + input: [""], + output: [], + description: "an empty string should result in no output chunk", + }, + { + input: ["", inputString], + output: [expectedOutputBytes], + description: "a leading empty chunk should be ignored", + }, + { + input: [inputString, ""], + output: [expectedOutputBytes], + description: "a trailing empty chunk should be ignored", + }, + { + input: ["A"], + output: [[65]], + description: "a plain ASCII chunk should be converted", + }, + { + input: ["\xff"], + output: [[195, 191]], + description: "characters in the ISO-8859-1 range should be encoded correctly", + }, +]; + +for (const { input, output, description } of testCases) { + test(description, async () => { + const inputStream = readableStreamFromArray(input); + const outputStream = inputStream.pipeThrough(new TextEncoderStream()); + const chunkArray = await Bun.readableStreamToArray(outputStream); + expect(chunkArray.length, "number of chunks should match").toBe(output.length); + for (let i = 0; i < output.length; ++i) { + expect(chunkArray[i].length).toBe(output[i].length); + for (let j = 0; j < output[i].length; ++j) { + expect(chunkArray[i][j]).toBe(output[i][j]); + } + } + }); +}