Skip to content

Commit

Permalink
Add micro-optimization to fs.readFile (#15076)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jarred-Sumner authored Nov 11, 2024
1 parent d879f43 commit 781a392
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 31 deletions.
137 changes: 129 additions & 8 deletions src/bun.js/node/node_fs.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3964,6 +3964,7 @@ const Return = struct {
pub const ReadFile = StringOrBuffer;
pub const ReadFileWithOptions = union(enum) {
string: string,
transcoded_string: bun.String,
buffer: JSC.Node.Buffer,
null_terminated: [:0]const u8,
};
Expand Down Expand Up @@ -5524,6 +5525,19 @@ pub const NodeFS = struct {
.buffer = ret.result.buffer,
},
},
.transcoded_string => |str| {
if (str.tag == .Dead) {
return .{ .err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path) };
}

return .{
.result = .{
.string = .{
.underlying = str,
},
},
};
},
.string => brk: {
const str = bun.SliceWithUnderlyingString.transcodeFromOwnedSlice(@constCast(ret.result.string), args.encoding);

Expand All @@ -5538,7 +5552,7 @@ pub const NodeFS = struct {
};
}

pub fn readFileWithOptions(this: *NodeFS, args: Arguments.ReadFile, comptime _: Flavor, comptime string_type: StringType) Maybe(Return.ReadFileWithOptions) {
pub fn readFileWithOptions(this: *NodeFS, args: Arguments.ReadFile, comptime flavor: Flavor, comptime string_type: StringType) Maybe(Return.ReadFileWithOptions) {
var path: [:0]const u8 = undefined;
const fd_maybe_windows: FileDescriptor = switch (args.path) {
.path => brk: {
Expand Down Expand Up @@ -5602,17 +5616,114 @@ pub const NodeFS = struct {
_ = Syscall.close(fd);
}

// Only used in DOMFormData
if (args.offset > 0) {
_ = Syscall.setFileOffset(fd, args.offset);
}

var did_succeed = false;
var total: usize = 0;
var async_stack_buffer: [if (flavor == .sync) 0 else 256 * 1024]u8 = undefined;

// --- Optimization: attempt to read up to 256 KB before calling stat()
// If we manage to read the entire file, we don't need to call stat() at all.
// This will make it slightly slower to read e.g. 512 KB files, but usually the OS won't return a full 512 KB in one read anyway.
const temporary_read_buffer_before_stat_call = brk: {
const temporary_read_buffer = temporary_read_buffer: {
var temporary_read_buffer: []u8 = &async_stack_buffer;

if (comptime flavor == .sync) {
if (this.vm) |vm| {
temporary_read_buffer = vm.rareData().pipeReadBuffer();
}
}

var available = temporary_read_buffer;
while (available.len > 0) {
switch (Syscall.read(fd, available)) {
.err => |err| return .{
.err = err,
},
.result => |amt| {
if (amt == 0) {
did_succeed = true;
break;
}
total += amt;
available = available[amt..];
},
}
}
break :temporary_read_buffer temporary_read_buffer[0..total];
};

if (did_succeed) {
switch (args.encoding) {
.buffer => {
if (comptime flavor == .sync and string_type == .default) {
if (this.vm) |vm| {
// Attempt to create the buffer in JSC's heap.
// This avoids creating a WastefulTypedArray.
const array_buffer = JSC.ArrayBuffer.createBuffer(vm.global, temporary_read_buffer);
array_buffer.ensureStillAlive();
return .{
.result = .{
.buffer = JSC.MarkedArrayBuffer{
.buffer = array_buffer.asArrayBuffer(vm.global) orelse {
// This case shouldn't really happen.
return .{
.err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path),
};
},
},
},
};
}
}

return .{
.result = .{
.buffer = Buffer.fromBytes(
bun.default_allocator.dupe(u8, temporary_read_buffer) catch return .{
.err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path),
},
bun.default_allocator,
.Uint8Array,
),
},
};
},
else => {
if (comptime string_type == .default) {
return .{
.result = .{
.transcoded_string = JSC.WebCore.Encoder.toWTFString(temporary_read_buffer, args.encoding),
},
};
} else {
return .{
.result = .{
.null_terminated = bun.default_allocator.dupeZ(u8, temporary_read_buffer) catch return .{
.err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path),
},
},
};
}
},
}
}

break :brk temporary_read_buffer;
};
// ----------------------------

const stat_ = switch (Syscall.fstat(fd)) {
.err => |err| return .{
.err = err,
},
.result => |stat_| stat_,
};

// Only used in DOMFormData
if (args.offset > 0) {
_ = Syscall.setFileOffset(fd, args.offset);
}
// For certain files, the size might be 0 but the file might still have contents.
// https://github.com/oven-sh/bun/issues/1220
const max_size = args.max_size orelse std.math.maxInt(JSC.WebCore.Blob.SizeType);
Expand All @@ -5626,6 +5737,7 @@ pub const NodeFS = struct {
// Only used in DOMFormData
max_size,
),
@as(i64, @intCast(total)),
0,
),
) + @intFromBool(comptime string_type == .null_terminated);
Expand All @@ -5639,14 +5751,23 @@ pub const NodeFS = struct {
}
}

var did_succeed = false;
var buf = std.ArrayList(u8).init(bun.default_allocator);
defer if (!did_succeed) buf.clearAndFree();
buf.ensureTotalCapacityPrecise(size + 16) catch return .{
buf.ensureTotalCapacityPrecise(
@min(
@max(temporary_read_buffer_before_stat_call.len, size) + 16,
max_size,
1024 * 1024 * 1024 * 8,
),
) catch return .{
.err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path),
};
if (temporary_read_buffer_before_stat_call.len > 0) {
buf.appendSlice(temporary_read_buffer_before_stat_call) catch return .{
.err = Syscall.Error.fromCode(.NOMEM, .read).withPathLike(args.path),
};
}
buf.expandToCapacity();
var total: usize = 0;

while (total < size) {
switch (Syscall.read(fd, buf.items.ptr[total..@min(buf.capacity, max_size)])) {
Expand Down
3 changes: 1 addition & 2 deletions src/bun.js/node/node_fs_binding.zig
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,7 @@ pub fn createBinding(globalObject: *JSC.JSGlobalObject) JSC.JSValue {
const module = NodeJSFS.new(.{});

const vm = globalObject.bunVM();
if (vm.standalone_module_graph != null)
module.node_fs.vm = vm;
module.node_fs.vm = vm;

return module.toJS(globalObject);
}
Expand Down
65 changes: 65 additions & 0 deletions src/bun.js/webcore/encoding.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,71 @@ pub const Encoder = struct {
}
}

/// Assumes `input` is not owned memory.
///
/// Can be run on non-JavaScript threads.
///
/// This is like toString(), but it returns a WTFString instead of a JSString*.
pub fn toWTFString(input: []const u8, encoding: JSC.Node.Encoding) bun.String {
if (input.len == 0)
return bun.String.empty;

switch (encoding) {
.ascii => {
const str, const chars = bun.String.createUninitialized(.latin1, input.len);
strings.copyLatin1IntoASCII(chars, input);
return str;
},
.latin1 => {
const str, const chars = bun.String.createUninitialized(.latin1, input.len);
@memcpy(chars, input);
return str;
},
.buffer, .utf8 => {
const converted = strings.toUTF16Alloc(bun.default_allocator, input, false, false) catch return bun.String.dead;
if (converted) |utf16| {
return bun.String.createExternalGloballyAllocated(.utf16, utf16);
}

// If we get here, it means we can safely assume the string is 100% ASCII characters
// For this, we rely on WebKit to manage the memory.
return bun.String.createLatin1(input);
},
.ucs2, .utf16le => {
// Avoid incomplete characters
if (input.len / 2 == 0) return bun.String.empty;

const output, const chars = bun.String.createUninitialized(.utf16, input.len / 2);
var output_bytes = std.mem.sliceAsBytes(chars);
output_bytes[output_bytes.len - 1] = 0;

@memcpy(output_bytes, input[0..output_bytes.len]);
return output;
},

.hex => {
const str, const chars = bun.String.createUninitialized(.latin1, input.len * 2);

const wrote = strings.encodeBytesToHex(chars, input);
bun.assert(wrote == chars.len);
return str;
},

.base64url => {
const out, const chars = bun.String.createUninitialized(.latin1, bun.base64.urlSafeEncodeLen(input));
_ = bun.base64.encodeURLSafe(chars, input);
return out;
},

.base64 => {
const to_len = bun.base64.encodeLen(input);
const to = bun.default_allocator.alloc(u8, to_len) catch return bun.String.dead;
const wrote = bun.base64.encode(to, input);
return bun.String.createExternalGloballyAllocated(.latin1, to[0..wrote]);
},
}
}

pub fn writeU8(input: [*]const u8, len: usize, to_ptr: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) !usize {
if (len == 0 or to_len == 0)
return 0;
Expand Down
43 changes: 22 additions & 21 deletions test/js/node/fs/fs-oom.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { memfd_create, setSyntheticAllocationLimitForTesting } from "bun:internal-for-testing";
import { expect, test } from "bun:test";
import { describe, expect, test } from "bun:test";
import { closeSync, readFileSync, writeSync } from "fs";
import { isLinux, isPosix } from "harness";
setSyntheticAllocationLimitForTesting(128 * 1024 * 1024);
Expand All @@ -22,26 +22,27 @@ if (isPosix) {

// memfd is linux only.
if (isLinux) {
test("fs.readFileSync large file show OOM without crashing the process.", () => {
const memfd = memfd_create(1024 * 1024 * 256 + 1);
{
let buf = new Uint8Array(32 * 1024 * 1024);
for (let i = 0; i < 1024 * 1024 * 256 + 1; i += buf.byteLength) {
writeSync(memfd, buf, i, buf.byteLength);
}
}
setSyntheticAllocationLimitForTesting(128 * 1024 * 1024);

try {
expect(() => readFileSync(memfd)).toThrow("Out of memory");
Bun.gc(true);
expect(() => readFileSync(memfd, "utf8")).toThrow("Out of memory");
describe("fs.readFileSync large file show OOM without crashing the process.", () => {
test.each(["buffer", "utf8", "ucs2", "latin1"] as const)("%s encoding", encoding => {
const memfd = memfd_create(1024 * 1024 * 16 + 1);
(function (memfd) {
let buf = new Uint8Array(8 * 1024 * 1024);
buf.fill(42);
for (let i = 0; i < 1024 * 1024 * 16 + 1; i += buf.byteLength) {
writeSync(memfd, buf, i, buf.byteLength);
}
})(memfd);
Bun.gc(true);
expect(() => readFileSync(memfd, "latin1")).toThrow("Out of memory");
Bun.gc(true);
// it is difficult in CI to test the other encodings.
} finally {
closeSync(memfd);
}
setSyntheticAllocationLimitForTesting(2 * 1024 * 1024);

try {
expect(() => (encoding === "buffer" ? readFileSync(memfd) : readFileSync(memfd, encoding))).toThrow(
"Out of memory",
);
} finally {
Bun.gc(true);
closeSync(memfd);
}
});
});
}

0 comments on commit 781a392

Please sign in to comment.