From 5bd344281f0c6e58cda47f2c868e2830aca5d80f Mon Sep 17 00:00:00 2001 From: Dylan Conway <35280289+dylan-conway@users.noreply.github.com> Date: Thu, 15 Aug 2024 03:35:58 -0700 Subject: [PATCH] fix(TextEncoder): domjit crash in `encode` (#13320) Co-authored-by: Jarred Sumner --- src/bun.js/bindings/Uint8Array.cpp | 4 +- src/bun.js/bindings/ZigGlobalObject.cpp | 6 +- src/bun.js/bindings/bindings.cpp | 2 +- src/bun.js/bindings/bindings.zig | 9 +++ src/bun.js/webcore/encoding.classes.ts | 5 ++ src/bun.js/webcore/encoding.zig | 13 ++-- src/codegen/generate-classes.ts | 62 ++++++++++++++++++- src/js/builtins/TextEncoderStream.ts | 7 +-- test/js/bun/jsc/domjit.test.ts | 5 ++ .../web/encoding/text-encoder-stream.test.ts | 1 + 10 files changed, 97 insertions(+), 17 deletions(-) diff --git a/src/bun.js/bindings/Uint8Array.cpp b/src/bun.js/bindings/Uint8Array.cpp index c1e3708b077afd..baa3c73003e7dc 100644 --- a/src/bun.js/bindings/Uint8Array.cpp +++ b/src/bun.js/bindings/Uint8Array.cpp @@ -18,9 +18,9 @@ extern "C" JSC::EncodedJSValue JSUint8Array__fromDefaultAllocator(JSC::JSGlobalO mi_free(p); })); - uint8Array = JSC::JSUint8Array::create(lexicalGlobalObject, lexicalGlobalObject->m_typedArrayUint8.get(lexicalGlobalObject), WTFMove(buffer), 0, length); + uint8Array = JSC::JSUint8Array::create(lexicalGlobalObject, lexicalGlobalObject->typedArrayStructure(JSC::TypeUint8, false), WTFMove(buffer), 0, length); } else { - uint8Array = JSC::JSUint8Array::create(lexicalGlobalObject, lexicalGlobalObject->m_typedArrayUint8.get(lexicalGlobalObject), 0); + uint8Array = JSC::JSUint8Array::create(lexicalGlobalObject, lexicalGlobalObject->typedArrayStructure(JSC::TypeUint8, false), 0); } return JSC::JSValue::encode(uint8Array); diff --git a/src/bun.js/bindings/ZigGlobalObject.cpp b/src/bun.js/bindings/ZigGlobalObject.cpp index 6ee84c7dfe3f90..8301bc18c475a3 100644 --- a/src/bun.js/bindings/ZigGlobalObject.cpp +++ b/src/bun.js/bindings/ZigGlobalObject.cpp @@ -1719,10 +1719,12 @@ extern "C" JSC__JSValue Bun__allocUint8ArrayForCopy(JSC::JSGlobalObject* globalO extern "C" JSC__JSValue Bun__createUint8ArrayForCopy(JSC::JSGlobalObject* globalObject, const void* ptr, size_t len, bool isBuffer) { - auto scope = DECLARE_THROW_SCOPE(globalObject->vm()); + VM& vm = globalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + JSC::JSUint8Array* array = JSC::JSUint8Array::createUninitialized( globalObject, - isBuffer ? reinterpret_cast(globalObject)->JSBufferSubclassStructure() : globalObject->m_typedArrayUint8.get(globalObject), + isBuffer ? reinterpret_cast(globalObject)->JSBufferSubclassStructure() : globalObject->typedArrayStructure(TypeUint8, false), len); if (UNLIKELY(!array)) { diff --git a/src/bun.js/bindings/bindings.cpp b/src/bun.js/bindings/bindings.cpp index c4cfd25e1fea22..acf15e69056b77 100644 --- a/src/bun.js/bindings/bindings.cpp +++ b/src/bun.js/bindings/bindings.cpp @@ -2986,7 +2986,7 @@ JSC__JSValue ZigString__toExternalValue(const ZigString* arg0, JSC__JSGlobalObje VirtualMachine* JSC__JSGlobalObject__bunVM(JSC__JSGlobalObject* arg0) { - return reinterpret_cast(reinterpret_cast(arg0)->bunVM()); + return reinterpret_cast(WebCore::clientData(arg0->vm())->bunVM); } JSC__JSValue ZigString__toValueGC(const ZigString* arg0, JSC__JSGlobalObject* arg1) diff --git a/src/bun.js/bindings/bindings.zig b/src/bun.js/bindings/bindings.zig index 2dcd0bf3eb939f..0e977b297065f9 100644 --- a/src/bun.js/bindings/bindings.zig +++ b/src/bun.js/bindings/bindings.zig @@ -1711,6 +1711,15 @@ pub const JSUint8Array = opaque { pub fn fromBytes(globalThis: *JSGlobalObject, bytes: []u8) JSC.JSValue { return JSUint8Array__fromDefaultAllocator(globalThis, bytes.ptr, bytes.len); } + + extern fn Bun__createUint8ArrayForCopy(*JSC.JSGlobalObject, ptr: ?*const anyopaque, len: usize, buffer: bool) JSValue; + pub fn fromBytesCopy(globalThis: *JSGlobalObject, bytes: []const u8) JSValue { + return Bun__createUint8ArrayForCopy(globalThis, bytes.ptr, bytes.len, false); + } + + pub fn createEmpty(globalThis: *JSGlobalObject) JSValue { + return Bun__createUint8ArrayForCopy(globalThis, null, 0, false); + } }; pub const JSCell = extern struct { diff --git a/src/bun.js/webcore/encoding.classes.ts b/src/bun.js/webcore/encoding.classes.ts index 7b5114ebd815ce..b26078ccc1fa34 100644 --- a/src/bun.js/webcore/encoding.classes.ts +++ b/src/bun.js/webcore/encoding.classes.ts @@ -51,6 +51,11 @@ export default [ flush: { fn: "flush", length: 0, + + DOMJIT: { + returns: "JSUint8Array", + args: [], + }, }, }, }), diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index f069f7326f2fbb..6ac268370913c9 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -17,6 +17,7 @@ const strings = bun.strings; const string = bun.string; const FeatureFlags = bun.FeatureFlags; const ArrayBuffer = @import("../base.zig").ArrayBuffer; +const JSUint8Array = JSC.JSUint8Array; const Properties = @import("../base.zig").Properties; const castObj = @import("../base.zig").castObj; @@ -450,7 +451,7 @@ pub const TextEncoderStreamEncoder = struct { fn encodeLatin1(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject, input: []const u8) JSValue { log("encodeLatin1: \"{s}\"", .{input}); - if (input.len == 0) return .undefined; + if (input.len == 0) return JSUint8Array.createEmpty(globalObject); const prepend_replacement_len: usize = prepend_replacement: { if (this.pending_lead_surrogate != null) { @@ -509,7 +510,7 @@ pub const TextEncoderStreamEncoder = struct { fn encodeUTF16(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject, input: []const u16) JSValue { log("encodeUTF16: \"{}\"", .{bun.fmt.utf16(input)}); - if (input.len == 0) return .undefined; + if (input.len == 0) return JSUint8Array.createEmpty(globalObject); const Prepend = struct { bytes: [4]u8, @@ -538,7 +539,7 @@ pub const TextEncoderStreamEncoder = struct { remain = remain[1..]; if (remain.len == 0) { - return ArrayBuffer.createBuffer( + return JSUint8Array.fromBytesCopy( globalObject, sequence[0..converted.utf8Width()], ); @@ -579,7 +580,7 @@ pub const TextEncoderStreamEncoder = struct { if (lead_surrogate) |pending_lead| { this.pending_lead_surrogate = pending_lead; - if (buf.items.len == 0) return .undefined; + if (buf.items.len == 0) return JSUint8Array.createEmpty(globalObject); } return JSC.JSUint8Array.fromBytes(globalObject, buf.items); @@ -601,9 +602,9 @@ pub const TextEncoderStreamEncoder = struct { fn flushBody(this: *TextEncoderStreamEncoder, globalObject: *JSGlobalObject) JSValue { return if (this.pending_lead_surrogate == null) - .undefined + JSUint8Array.createEmpty(globalObject) else - JSC.ArrayBuffer.createBuffer(globalObject, &.{ 0xef, 0xbf, 0xbd }); + JSUint8Array.fromBytesCopy(globalObject, &.{ 0xef, 0xbf, 0xbd }); } }; diff --git a/src/codegen/generate-classes.ts b/src/codegen/generate-classes.ts index 806f280bedb099..d93b26a62b75f5 100644 --- a/src/codegen/generate-classes.ts +++ b/src/codegen/generate-classes.ts @@ -132,7 +132,7 @@ static const JSC::DOMJIT::Signature DOMJITSignatureFor${fnName}(${DOMJITName(fnN ); } -function DOMJITFunctionDefinition(jsClassName, fnName, symName, { args }) { +function DOMJITFunctionDefinition(jsClassName, fnName, symName, { args }, fn) { const argNames = args.map((arg, i) => `${argTypeName(arg)} arg${i}`); const formattedArgs = argNames.length > 0 ? `, ${argNames.join(", ")}` : ""; const retArgs = argNames.length > 0 ? `, ${args.map((b, i) => "arg" + i).join(", ")}` : ""; @@ -147,6 +147,24 @@ JSC_DEFINE_JIT_OPERATION(${DOMJITName( CallFrame* callFrame = DECLARE_CALL_FRAME(vm); IGNORE_WARNINGS_END JSC::JITOperationPrologueCallFrameTracer tracer(vm, callFrame); +#ifdef BUN_DEBUG + ${jsClassName}* wrapper = reinterpret_cast<${jsClassName}*>(thisValue); + JSC::EncodedJSValue result = ${DOMJITName(symName)}(wrapper->wrapped(), lexicalGlobalObject${retArgs}); + JSValue decoded = JSValue::decode(result); + if (wrapper->m_${fn}_expectedResultType) { + if (decoded.isCell() && !decoded.isEmpty()) { + ASSERT_WITH_MESSAGE(wrapper->m_${fn}_expectedResultType.value().has_value(), "DOMJIT function return type changed!"); + ASSERT_WITH_MESSAGE(wrapper->m_${fn}_expectedResultType.value().value() == decoded.asCell()->type(), "DOMJIT function return type changed!"); + } else { + ASSERT_WITH_MESSAGE(!wrapper->m_${fn}_expectedResultType.value().has_value(), "DOMJIT function return type changed!"); + } + } else if (!decoded.isEmpty()) { + wrapper->m_${fn}_expectedResultType = decoded.isCell() + ? std::optional(decoded.asCell()->type()) + : std::optional(std::nullopt); + } + return { result }; +#endif return {${DOMJITName(symName)}(reinterpret_cast<${jsClassName}*>(thisValue)->wrapped(), lexicalGlobalObject${retArgs})}; } `.trim(); @@ -853,6 +871,7 @@ function renderDecls(symbolName, typeName, proto, supportsObjectCreate = false) symbolName(typeName, name), symbolName(typeName, proto[name].fn), proto[name].DOMJIT, + proto[name].fn, ), ); } @@ -1089,6 +1108,7 @@ JSC_DEFINE_CUSTOM_SETTER(${symbolName(typeName, name)}SetterWrap, (JSGlobalObjec } if ("fn" in proto[name]) { + const fn = proto[name].fn; rows.push(` JSC_DEFINE_HOST_FUNCTION(${symbolName(typeName, name)}Callback, (JSGlobalObject * lexicalGlobalObject, CallFrame* callFrame)) { @@ -1114,10 +1134,29 @@ JSC_DEFINE_HOST_FUNCTION(${symbolName(typeName, name)}Callback, (JSGlobalObject lastFileName = fileName; } - JSC::EncodedJSValue result = ${symbolName(typeName, proto[name].fn)}(thisObject->wrapped(), lexicalGlobalObject, callFrame${proto[name].passThis ? ", JSValue::encode(thisObject)" : ""}); + JSC::EncodedJSValue result = ${symbolName(typeName, fn)}(thisObject->wrapped(), lexicalGlobalObject, callFrame, callFrame${proto[name].passThis ? ", JSValue::encode(thisObject)" : ""}); ASSERT_WITH_MESSAGE(!JSValue::decode(result).isEmpty() or DECLARE_CATCH_SCOPE(vm).exception() != 0, \"${typeName}.${proto[name].fn} returned an empty value without an exception\"); + ${ + !proto[name].DOMJIT + ? "" + : ` + JSValue decoded = JSValue::decode(result); + if (thisObject->m_${fn}_expectedResultType) { + if (decoded.isCell() && !decoded.isEmpty()) { + ASSERT_WITH_MESSAGE(thisObject->m_${fn}_expectedResultType.value().has_value(), "DOMJIT function return type changed!"); + ASSERT_WITH_MESSAGE(thisObject->m_${fn}_expectedResultType.value().value() == decoded.asCell()->type(), "DOMJIT function return type changed!"); + } else { + ASSERT_WITH_MESSAGE(!thisObject->m_${fn}_expectedResultType.value().has_value(), "DOMJIT function return type changed!"); + } + } else if (!decoded.isEmpty()) { + thisObject->m_${fn}_expectedResultType = decoded.isCell() + ? std::optional(decoded.asCell()->type()) + : std::optional(std::nullopt); + }` + } + return result; #endif @@ -1265,6 +1304,8 @@ function generateClassHeader(typeName, obj: ClassDefinition) { }) .join("\n")} + ${domJITTypeCheckFields(proto, klass)} + ${weakOwner} ${DECLARE_VISIT_CHILDREN} @@ -1276,6 +1317,23 @@ function generateClassHeader(typeName, obj: ClassDefinition) { `.trim(); } +function domJITTypeCheckFields(proto, klass) { + var output = "#ifdef BUN_DEBUG\n"; + for (const name in proto) { + const { DOMJIT, fn } = proto[name]; + if (!DOMJIT) continue; + output += `std::optional> m_${fn}_expectedResultType = std::nullopt;\n`; + } + + for (const name in klass) { + const { DOMJIT, fn } = klass[name]; + if (!DOMJIT) continue; + output += `std::optional> m_${fn}_expectedResultType = std::nullopt;\n`; + } + output += "#endif\n"; + return output; +} + function generateClassImpl(typeName, obj: ClassDefinition) { const { klass: fields, diff --git a/src/js/builtins/TextEncoderStream.ts b/src/js/builtins/TextEncoderStream.ts index daf38e83413392..4aa0c895dd1c9f 100644 --- a/src/js/builtins/TextEncoderStream.ts +++ b/src/js/builtins/TextEncoderStream.ts @@ -29,13 +29,12 @@ export function initializeTextEncoderStream() { }; const transformAlgorithm = chunk => { const encoder = $getByIdDirectPrivate(this, "textEncoderStreamEncoder"); - let buffer; try { - buffer = encoder.encode(chunk); + var buffer = encoder.encode(chunk); } catch (e) { return Promise.$reject(e); } - if (buffer) { + if (buffer.length) { const transformStream = $getByIdDirectPrivate(this, "textEncoderStreamTransform"); const controller = $getByIdDirectPrivate(transformStream, "controller"); $transformStreamDefaultControllerEnqueue(controller, buffer); @@ -45,7 +44,7 @@ export function initializeTextEncoderStream() { const flushAlgorithm = () => { const encoder = $getByIdDirectPrivate(this, "textEncoderStreamEncoder"); const buffer = encoder.flush(); - if (buffer) { + if (buffer.length) { const transformStream = $getByIdDirectPrivate(this, "textEncoderStreamTransform"); const controller = $getByIdDirectPrivate(transformStream, "controller"); $transformStreamDefaultControllerEnqueue(controller, buffer); diff --git a/test/js/bun/jsc/domjit.test.ts b/test/js/bun/jsc/domjit.test.ts index cb090b578d2d55..6e396f0b896ecc 100644 --- a/test/js/bun/jsc/domjit.test.ts +++ b/test/js/bun/jsc/domjit.test.ts @@ -106,6 +106,7 @@ describe("DOMJIT", () => { describe("in NodeVM", () => { const code = ` const buf = new Uint8Array(4); + const encoder = new TextEncoder(); for (let iter of [100000]) { for (let i = 0; i < iter; i++) { performance.now(); @@ -113,6 +114,10 @@ describe("DOMJIT", () => { for (let i = 0; i < iter; i++) { new TextEncoder().encode("test"); } + const str = "a".repeat(1030); + for (let i = 0; i < 1000000; i++) { + const result = encoder.encode(str); + } for (let i = 0; i < iter; i++) { new TextEncoder().encodeInto("test", buf); } diff --git a/test/js/web/encoding/text-encoder-stream.test.ts b/test/js/web/encoding/text-encoder-stream.test.ts index 217e2d91a21e50..be51935108f607 100644 --- a/test/js/web/encoding/text-encoder-stream.test.ts +++ b/test/js/web/encoding/text-encoder-stream.test.ts @@ -135,6 +135,7 @@ for (const { input, output, description } of testCases) { const chunkArray = await Bun.readableStreamToArray(outputStream); expect(chunkArray.length, "number of chunks should match").toBe(output.length); for (let i = 0; i < output.length; ++i) { + expect(chunkArray[i].constructor).toBe(Uint8Array); expect(chunkArray[i].length).toBe(output[i].length); for (let j = 0; j < output[i].length; ++j) { expect(chunkArray[i][j]).toBe(output[i][j]);