diff --git a/src/bun.js/node/node_fs.zig b/src/bun.js/node/node_fs.zig index 99a497e48e8a81..94d487b5e1bfef 100644 --- a/src/bun.js/node/node_fs.zig +++ b/src/bun.js/node/node_fs.zig @@ -3964,6 +3964,7 @@ const Return = struct { pub const ReadFile = StringOrBuffer; pub const ReadFileWithOptions = union(enum) { string: string, + transcoded_string: bun.String, buffer: JSC.Node.Buffer, null_terminated: [:0]const u8, }; @@ -5524,6 +5525,19 @@ pub const NodeFS = struct { .buffer = ret.result.buffer, }, }, + .transcoded_string => |str| { + if (str.tag == .Dead) { + return .{ .err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path) }; + } + + return .{ + .result = .{ + .string = .{ + .underlying = str, + }, + }, + }; + }, .string => brk: { const str = bun.SliceWithUnderlyingString.transcodeFromOwnedSlice(@constCast(ret.result.string), args.encoding); @@ -5538,7 +5552,7 @@ pub const NodeFS = struct { }; } - pub fn readFileWithOptions(this: *NodeFS, args: Arguments.ReadFile, comptime _: Flavor, comptime string_type: StringType) Maybe(Return.ReadFileWithOptions) { + pub fn readFileWithOptions(this: *NodeFS, args: Arguments.ReadFile, comptime flavor: Flavor, comptime string_type: StringType) Maybe(Return.ReadFileWithOptions) { var path: [:0]const u8 = undefined; const fd_maybe_windows: FileDescriptor = switch (args.path) { .path => brk: { @@ -5602,6 +5616,107 @@ pub const NodeFS = struct { _ = Syscall.close(fd); } + // Only used in DOMFormData + if (args.offset > 0) { + _ = Syscall.setFileOffset(fd, args.offset); + } + + var did_succeed = false; + var total: usize = 0; + var async_stack_buffer: [if (flavor == .sync) 0 else 256 * 1024]u8 = undefined; + + // --- Optimization: attempt to read up to 256 KB before calling stat() + // If we manage to read the entire file, we don't need to call stat() at all. + // This will make it slightly slower to read e.g. 512 KB files, but usually the OS won't return a full 512 KB in one read anyway. + const temporary_read_buffer_before_stat_call = brk: { + const temporary_read_buffer = temporary_read_buffer: { + var temporary_read_buffer: []u8 = &async_stack_buffer; + + if (comptime flavor == .sync) { + if (this.vm) |vm| { + temporary_read_buffer = vm.rareData().pipeReadBuffer(); + } + } + + var available = temporary_read_buffer; + while (available.len > 0) { + switch (Syscall.read(fd, available)) { + .err => |err| return .{ + .err = err, + }, + .result => |amt| { + if (amt == 0) { + did_succeed = true; + break; + } + total += amt; + available = available[amt..]; + }, + } + } + break :temporary_read_buffer temporary_read_buffer[0..total]; + }; + + if (did_succeed) { + switch (args.encoding) { + .buffer => { + if (comptime flavor == .sync and string_type == .default) { + if (this.vm) |vm| { + // Attempt to create the buffer in JSC's heap. + // This avoids creating a WastefulTypedArray. + const array_buffer = JSC.ArrayBuffer.createBuffer(vm.global, temporary_read_buffer); + array_buffer.ensureStillAlive(); + return .{ + .result = .{ + .buffer = JSC.MarkedArrayBuffer{ + .buffer = array_buffer.asArrayBuffer(vm.global) orelse { + // This case shouldn't really happen. + return .{ + .err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path), + }; + }, + }, + }, + }; + } + } + + return .{ + .result = .{ + .buffer = Buffer.fromBytes( + bun.default_allocator.dupe(u8, temporary_read_buffer) catch return .{ + .err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path), + }, + bun.default_allocator, + .Uint8Array, + ), + }, + }; + }, + else => { + if (comptime string_type == .default) { + return .{ + .result = .{ + .transcoded_string = JSC.WebCore.Encoder.toWTFString(temporary_read_buffer, args.encoding), + }, + }; + } else { + return .{ + .result = .{ + .null_terminated = bun.default_allocator.dupeZ(u8, temporary_read_buffer) catch return .{ + .err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path), + }, + }, + }; + } + }, + } + } + + break :brk temporary_read_buffer; + }; + // ---------------------------- + const stat_ = switch (Syscall.fstat(fd)) { .err => |err| return .{ .err = err, @@ -5609,10 +5724,6 @@ pub const NodeFS = struct { .result => |stat_| stat_, }; - // Only used in DOMFormData - if (args.offset > 0) { - _ = Syscall.setFileOffset(fd, args.offset); - } // For certain files, the size might be 0 but the file might still have contents. // https://github.com/oven-sh/bun/issues/1220 const max_size = args.max_size orelse std.math.maxInt(JSC.WebCore.Blob.SizeType); @@ -5626,6 +5737,7 @@ pub const NodeFS = struct { // Only used in DOMFormData max_size, ), + @as(i64, @intCast(total)), 0, ), ) + @intFromBool(comptime string_type == .null_terminated); @@ -5639,14 +5751,23 @@ pub const NodeFS = struct { } } - var did_succeed = false; var buf = std.ArrayList(u8).init(bun.default_allocator); defer if (!did_succeed) buf.clearAndFree(); - buf.ensureTotalCapacityPrecise(size + 16) catch return .{ + buf.ensureTotalCapacityPrecise( + @min( + @max(temporary_read_buffer_before_stat_call.len, size) + 16, + max_size, + 1024 * 1024 * 1024 * 8, + ), + ) catch return .{ .err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path), }; + if (temporary_read_buffer_before_stat_call.len > 0) { + buf.appendSlice(temporary_read_buffer_before_stat_call) catch return .{ + .err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path), + }; + } buf.expandToCapacity(); - var total: usize = 0; while (total < size) { switch (Syscall.read(fd, buf.items.ptr[total..@min(buf.capacity, max_size)])) { diff --git a/src/bun.js/node/node_fs_binding.zig b/src/bun.js/node/node_fs_binding.zig index c0dc296e5d45e7..927e40c43abb05 100644 --- a/src/bun.js/node/node_fs_binding.zig +++ b/src/bun.js/node/node_fs_binding.zig @@ -261,8 +261,7 @@ pub fn createBinding(globalObject: *JSC.JSGlobalObject) JSC.JSValue { const module = NodeJSFS.new(.{}); const vm = globalObject.bunVM(); - if (vm.standalone_module_graph != null) - module.node_fs.vm = vm; + module.node_fs.vm = vm; return module.toJS(globalObject); } diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index 9dffa73e92a0ff..8b8e8af0175971 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -1259,6 +1259,71 @@ pub const Encoder = struct { } } + /// Assumes `input` is not owned memory. + /// + /// Can be run on non-JavaScript threads. + /// + /// This is like toString(), but it returns a WTFString instead of a JSString*. + pub fn toWTFString(input: []const u8, encoding: JSC.Node.Encoding) bun.String { + if (input.len == 0) + return bun.String.empty; + + switch (encoding) { + .ascii => { + const str, const chars = bun.String.createUninitialized(.latin1, input.len); + strings.copyLatin1IntoASCII(chars, input); + return str; + }, + .latin1 => { + const str, const chars = bun.String.createUninitialized(.latin1, input.len); + @memcpy(chars, input); + return str; + }, + .buffer, .utf8 => { + const converted = strings.toUTF16Alloc(bun.default_allocator, input, false, false) catch return bun.String.dead; + if (converted) |utf16| { + return bun.String.createExternalGloballyAllocated(.utf16, utf16); + } + + // If we get here, it means we can safely assume the string is 100% ASCII characters + // For this, we rely on WebKit to manage the memory. + return bun.String.createLatin1(input); + }, + .ucs2, .utf16le => { + // Avoid incomplete characters + if (input.len / 2 == 0) return bun.String.empty; + + const output, const chars = bun.String.createUninitialized(.utf16, input.len / 2); + var output_bytes = std.mem.sliceAsBytes(chars); + output_bytes[output_bytes.len - 1] = 0; + + @memcpy(output_bytes, input[0..output_bytes.len]); + return output; + }, + + .hex => { + const str, const chars = bun.String.createUninitialized(.latin1, input.len * 2); + + const wrote = strings.encodeBytesToHex(chars, input); + bun.assert(wrote == chars.len); + return str; + }, + + .base64url => { + const out, const chars = bun.String.createUninitialized(.latin1, bun.base64.urlSafeEncodeLen(input)); + _ = bun.base64.encodeURLSafe(chars, input); + return out; + }, + + .base64 => { + const to_len = bun.base64.encodeLen(input); + const to = bun.default_allocator.alloc(u8, to_len) catch return bun.String.dead; + const wrote = bun.base64.encode(to, input); + return bun.String.createExternalGloballyAllocated(.latin1, to[0..wrote]); + }, + } + } + pub fn writeU8(input: [*]const u8, len: usize, to_ptr: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) !usize { if (len == 0 or to_len == 0) return 0; diff --git a/test/js/node/fs/fs-oom.test.ts b/test/js/node/fs/fs-oom.test.ts index c45728be5219a4..a859dc6a89c918 100644 --- a/test/js/node/fs/fs-oom.test.ts +++ b/test/js/node/fs/fs-oom.test.ts @@ -1,5 +1,5 @@ import { memfd_create, setSyntheticAllocationLimitForTesting } from "bun:internal-for-testing"; -import { expect, test } from "bun:test"; +import { describe, expect, test } from "bun:test"; import { closeSync, readFileSync, writeSync } from "fs"; import { isLinux, isPosix } from "harness"; setSyntheticAllocationLimitForTesting(128 * 1024 * 1024); @@ -22,26 +22,27 @@ if (isPosix) { // memfd is linux only. if (isLinux) { - test("fs.readFileSync large file show OOM without crashing the process.", () => { - const memfd = memfd_create(1024 * 1024 * 256 + 1); - { - let buf = new Uint8Array(32 * 1024 * 1024); - for (let i = 0; i < 1024 * 1024 * 256 + 1; i += buf.byteLength) { - writeSync(memfd, buf, i, buf.byteLength); - } - } - setSyntheticAllocationLimitForTesting(128 * 1024 * 1024); - - try { - expect(() => readFileSync(memfd)).toThrow("Out of memory"); - Bun.gc(true); - expect(() => readFileSync(memfd, "utf8")).toThrow("Out of memory"); + describe("fs.readFileSync large file show OOM without crashing the process.", () => { + test.each(["buffer", "utf8", "ucs2", "latin1"] as const)("%s encoding", encoding => { + const memfd = memfd_create(1024 * 1024 * 16 + 1); + (function (memfd) { + let buf = new Uint8Array(8 * 1024 * 1024); + buf.fill(42); + for (let i = 0; i < 1024 * 1024 * 16 + 1; i += buf.byteLength) { + writeSync(memfd, buf, i, buf.byteLength); + } + })(memfd); Bun.gc(true); - expect(() => readFileSync(memfd, "latin1")).toThrow("Out of memory"); - Bun.gc(true); - // it is difficult in CI to test the other encodings. - } finally { - closeSync(memfd); - } + setSyntheticAllocationLimitForTesting(2 * 1024 * 1024); + + try { + expect(() => (encoding === "buffer" ? readFileSync(memfd) : readFileSync(memfd, encoding))).toThrow( + "Out of memory", + ); + } finally { + Bun.gc(true); + closeSync(memfd); + } + }); }); }