diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9feff2712177b1..e3ff1de2e39252 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -285,7 +285,7 @@ If you see this error when compiling, run: $ xcode-select --install ``` -## Cannot find `libatomic.a` +### Cannot find `libatomic.a` Bun defaults to linking `libatomic` statically, as not all systems have it. If you are building on a distro that does not have a static libatomic available, you can run the following command to enable dynamic linking: @@ -295,7 +295,7 @@ $ bun run build -DUSE_STATIC_LIBATOMIC=OFF The built version of Bun may not work on other systems if compiled this way. -## ccache conflicts with building TinyCC on macOS +### ccache conflicts with building TinyCC on macOS If you run into issues with `ccache` when building TinyCC, try reinstalling ccache @@ -303,3 +303,9 @@ If you run into issues with `ccache` when building TinyCC, try reinstalling ccac brew uninstall ccache brew install ccache ``` + +## Using bun-debug + +- Disable logging: `BUN_DEBUG_QUIET_LOGS=1 bun-debug ...` (to disable all debug logging) +- Enable logging for a specific zig scope: `BUN_DEBUG_EventLoop=1 bun-debug ...` (to allow `std.log.scoped(.EventLoop)`) +- Bun transpiles every file it runs, to see the actual executed source in a debug build find it in `/tmp/bun-debug-src/...path/to/file`, for example the transpiled version of `/home/bun/index.ts` would be in `/tmp/bun-debug-src/home/bun/index.ts` diff --git a/src/StandaloneModuleGraph.zig b/src/StandaloneModuleGraph.zig index a58989280f8b97..e3daa4da1713db 100644 --- a/src/StandaloneModuleGraph.zig +++ b/src/StandaloneModuleGraph.zig @@ -1072,7 +1072,7 @@ pub const StandaloneModuleGraph = struct { if (item.data != .e_string) return error.InvalidSourceMap; - const decoded = try item.data.e_string.stringDecodedUTF8(arena); + const decoded = try item.data.e_string.stringCloned(arena); const offset = string_payload.items.len; try string_payload.appendSlice(decoded); @@ -1089,7 +1089,7 @@ pub const StandaloneModuleGraph = struct { if (item.data != .e_string) return error.InvalidSourceMap; - const utf8 = try item.data.e_string.stringDecodedUTF8(arena); + const utf8 = try item.data.e_string.stringCloned(arena); defer arena.free(utf8); const offset = string_payload.items.len; diff --git a/src/api/schema.zig b/src/api/schema.zig index fa851862807022..002f43223f1e2e 100644 --- a/src/api/schema.zig +++ b/src/api/schema.zig @@ -2816,7 +2816,7 @@ pub const Api = struct { fn expectString(this: *Parser, expr: js_ast.Expr) !void { switch (expr.data) { - .e_string, .e_utf8_string => {}, + .e_string => {}, else => { this.log.addErrorFmt(this.source, expr.loc, this.allocator, "expected string but received {}", .{ @as(js_ast.Expr.Tag, expr.data), diff --git a/src/bun.js/module_loader.zig b/src/bun.js/module_loader.zig index 2f378ec45a7ae7..ded396631b953a 100644 --- a/src/bun.js/module_loader.zig +++ b/src/bun.js/module_loader.zig @@ -1767,7 +1767,7 @@ pub const ModuleLoader = struct { .specifier = input_specifier, .source_url = input_specifier.createIfDifferent(path.text), .hash = 0, - .jsvalue_for_export = parse_result.ast.parts.@"[0]"().stmts[0].data.s_expr.value.toJS(allocator, globalObject orelse jsc_vm.global, .{}) catch @panic("Unexpected JS error"), + .jsvalue_for_export = parse_result.ast.parts.@"[0]"().stmts[0].data.s_expr.value.toJS(allocator, globalObject orelse jsc_vm.global) catch @panic("Unexpected JS error"), .tag = .exports_object, }; } diff --git a/src/bundler.zig b/src/bundler.zig index 3db3a446e03b64..36b674724cb43a 100644 --- a/src/bundler.zig +++ b/src/bundler.zig @@ -1594,7 +1594,7 @@ pub const Bundler = struct { }, // TODO: use lazy export AST .text => { - const expr = js_ast.Expr.init(js_ast.E.UTF8String, js_ast.E.UTF8String{ + const expr = js_ast.Expr.init(js_ast.E.String, js_ast.E.String{ .data = source.contents, }, logger.Loc.Empty); const stmt = js_ast.Stmt.alloc(js_ast.S.ExportDefault, js_ast.S.ExportDefault{ diff --git a/src/bundler/bundle_v2.zig b/src/bundler/bundle_v2.zig index 9e5dfb43787384..aeaf33c8b00417 100644 --- a/src/bundler/bundle_v2.zig +++ b/src/bundler/bundle_v2.zig @@ -3504,7 +3504,7 @@ pub const ParseTask = struct { return JSAst.init((try js_parser.newLazyExportAST(allocator, bundler.options.define, opts, log, root, &source, "")).?); }, .text => { - const root = Expr.init(E.UTF8String, E.UTF8String{ + const root = Expr.init(E.String, E.String{ .data = source.contents, }, Logger.Loc{ .start = 0 }); var ast = JSAst.init((try js_parser.newLazyExportAST(allocator, bundler.options.define, opts, log, root, &source, "")).?); diff --git a/src/bunfig.zig b/src/bunfig.zig index 5be26533cbe632..4f1d73d68c3162 100644 --- a/src/bunfig.zig +++ b/src/bunfig.zig @@ -797,7 +797,7 @@ pub const Bunfig = struct { pub fn expectString(this: *Parser, expr: js_ast.Expr) !void { switch (expr.data) { - .e_string, .e_utf8_string => {}, + .e_string => {}, else => { this.log.addErrorFmtOpts( this.allocator, diff --git a/src/cli/run_command.zig b/src/cli/run_command.zig index d24ef3600a89cd..3267b38de8217c 100644 --- a/src/cli/run_command.zig +++ b/src/cli/run_command.zig @@ -194,7 +194,6 @@ pub const RunCommand = struct { delimiter = 0; }, - // do we need to escape? ' ' => { delimiter = ' '; }, @@ -236,24 +235,6 @@ pub const RunCommand = struct { delimiter = 0; }, - // TODO: handle escape sequences properly - // https://github.com/oven-sh/bun/issues/53 - '\\' => { - delimiter = 0; - - if (entry_i + 1 < script.len) { - switch (script[entry_i + 1]) { - '"', '\'' => { - entry_i += 1; - continue; - }, - '\\' => { - entry_i += 1; - }, - else => {}, - } - } - }, else => { delimiter = 0; }, diff --git a/src/feature_flags.zig b/src/feature_flags.zig index eecfa45a163885..6a7ceb55fce496 100644 --- a/src/feature_flags.zig +++ b/src/feature_flags.zig @@ -13,9 +13,6 @@ pub const jsx_runtime_is_cjs = true; pub const tracing = true; -/// Disabled due to bugs -pub const minify_javascript_string_length = false; - // TODO: remove this flag, it should use bun.Output.scoped pub const verbose_watcher = false; diff --git a/src/ini.zig b/src/ini.zig index 4db24c0b19e9a3..a767cbeee09576 100644 --- a/src/ini.zig +++ b/src/ini.zig @@ -286,7 +286,7 @@ pub const Parser = struct { const c = val[i]; if (esc) { switch (c) { - '\\' => try unesc.appendSlice(&[_]u8{ '\\', '\\' }), + '\\' => try unesc.appendSlice(&[_]u8{'\\'}), ';', '#', '$' => try unesc.append(c), '.' => { if (comptime usage == .section) { @@ -636,7 +636,7 @@ pub const IniTestingAPIs = struct { } }; - return parser.out.toJS(bun.default_allocator, globalThis, .{ .decode_escape_sequences = true }) catch |e| { + return parser.out.toJS(bun.default_allocator, globalThis) catch |e| { globalThis.throwError(e, "failed to turn AST into JS"); return .undefined; }; @@ -660,7 +660,6 @@ pub const ToStringFormatter = struct { .e_number => try writer.print("{d}", .{this.d.e_number.value}), .e_string => try writer.print("{s}", .{this.d.e_string.data}), .e_null => try writer.print("null", .{}), - .e_utf8_string => try writer.print("{s}", .{this.d.e_utf8_string.data}), else => |tag| if (bun.Environment.isDebug) { Output.panic("Unexpected AST node: {s}", .{@tagName(tag)}); diff --git a/src/install/install.zig b/src/install/install.zig index 3fbb8f010f44b3..606f2a0389c16e 100644 --- a/src/install/install.zig +++ b/src/install/install.zig @@ -2780,7 +2780,6 @@ pub const PackageManager = struct { pub const GetJSONOptions = struct { init_reset_store: bool = true, - always_decode_escape_sequences: bool = true, guess_indentation: bool = false, }; @@ -2840,7 +2839,6 @@ pub const PackageManager = struct { .is_json = true, .allow_comments = true, .allow_trailing_commas = true, - .always_decode_escape_sequences = opts.always_decode_escape_sequences, .guess_indentation = opts.guess_indentation, }, ) catch |err| { @@ -2894,7 +2892,6 @@ pub const PackageManager = struct { .is_json = true, .allow_comments = true, .allow_trailing_commas = true, - .always_decode_escape_sequences = opts.always_decode_escape_sequences, .guess_indentation = opts.guess_indentation, }, ); @@ -10412,7 +10409,6 @@ pub const PackageManager = struct { manager.log, manager.original_package_json_path, .{ - .always_decode_escape_sequences = false, .guess_indentation = true, }, )) { diff --git a/src/js_ast.zig b/src/js_ast.zig index 74ddcc0ddbb57f..e1840a32ddf980 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -1443,9 +1443,6 @@ pub const OptionalChain = enum(u1) { }; pub const E = struct { - pub const ToJsOpts = struct { - decode_escape_sequences: bool = true, - }; pub const Array = struct { items: ExprNodeList = ExprNodeList{}, comma_after_spread: ?logger.Loc = null, @@ -1503,13 +1500,13 @@ pub const E = struct { return ExprNodeList.init(out[0 .. out.len - remain.len]); } - pub fn toJS(this: @This(), allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject, comptime opts: ToJsOpts) ToJSError!JSC.JSValue { + pub fn toJS(this: @This(), allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject) ToJSError!JSC.JSValue { const items = this.items.slice(); var array = JSC.JSValue.createEmptyArray(globalObject, items.len); array.protect(); defer array.unprotect(); for (items, 0..) |expr, j| { - array.putIndex(globalObject, @as(u32, @truncate(j)), try expr.data.toJS(allocator, globalObject, opts)); + array.putIndex(globalObject, @as(u32, @truncate(j)), try expr.data.toJS(allocator, globalObject)); } return array; @@ -1532,11 +1529,6 @@ pub const E = struct { }; }; - /// A string which will be printed as JSON by the JSPrinter. - pub const UTF8String = struct { - data: []const u8, - }; - pub const Unary = struct { op: Op.Code, value: ExprNodeIndex, @@ -1951,7 +1943,7 @@ pub const E = struct { return if (asProperty(self, key)) |query| query.expr else @as(?Expr, null); } - pub fn toJS(this: *Object, allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject, comptime opts: ToJsOpts) ToJSError!JSC.JSValue { + pub fn toJS(this: *Object, allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject) ToJSError!JSC.JSValue { var obj = JSC.JSValue.createEmptyObject(globalObject, this.properties.len); obj.protect(); defer obj.unprotect(); @@ -1961,7 +1953,7 @@ pub const E = struct { return error.@"Cannot convert argument type to JS"; } var key = prop.key.?.data.e_string.toZigString(allocator); - obj.put(globalObject, &key, try prop.value.?.toJS(allocator, globalObject, opts)); + obj.put(globalObject, &key, try prop.value.?.toJS(allocator, globalObject)); } return obj; @@ -2404,22 +2396,20 @@ pub const E = struct { return str.string(allocator); } - pub fn javascriptLength(s: *const String) u32 { + pub fn javascriptLength(s: *const String) ?u32 { if (s.rope_len > 0) { // We only support ascii ropes for now return s.rope_len; } if (s.isUTF8()) { - if (comptime !Environment.isNative) { - const allocated = (strings.toUTF16Alloc(bun.default_allocator, s.data, false, false) catch return 0) orelse return s.data.len; - defer bun.default_allocator.free(allocated); - return @as(u32, @truncate(allocated.len)); + if (!strings.isAllASCII(s.data)) { + return null; } - return @as(u32, @truncate(bun.simdutf.length.utf16.from.utf8(s.data))); + return @truncate(s.data.len); } - return @as(u32, @truncate(s.slice16().len)); + return @truncate(s.slice16().len); } pub inline fn len(s: *const String) usize { @@ -2521,12 +2511,6 @@ pub const E = struct { } } - pub fn stringDecodedUTF8(s: *const String, allocator: std.mem.Allocator) !bun.string { - const utf16_decode = try bun.js_lexer.decodeStringLiteralEscapeSequencesToUTF16(try s.string(allocator), allocator); - defer allocator.free(utf16_decode); - return try bun.strings.toUTF8Alloc(allocator, utf16_decode); - } - pub fn hash(s: *const String) u64 { if (s.isBlank()) return 0; @@ -2539,33 +2523,31 @@ pub const E = struct { } } - pub fn toJS(s: *String, allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject, comptime opts: ToJsOpts) JSC.JSValue { + pub fn toJS(s: *String, allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject) !JSC.JSValue { + s.resolveRopeIfNeeded(allocator); if (!s.isPresent()) { var emp = bun.String.empty; return emp.toJS(globalObject); } - if (s.is_utf16) { - var out, const chars = bun.String.createUninitialized(.utf16, s.len()); + if (s.isUTF8()) { + if (try strings.toUTF16Alloc(allocator, s.slice8(), false, false)) |utf16| { + var out, const chars = bun.String.createUninitialized(.utf16, utf16.len); + defer out.deref(); + @memcpy(chars, utf16); + return out.toJS(globalObject); + } else { + var out, const chars = bun.String.createUninitialized(.latin1, s.slice8().len); + defer out.deref(); + @memcpy(chars, s.slice8()); + return out.toJS(globalObject); + } + } else { + var out, const chars = bun.String.createUninitialized(.utf16, s.slice16().len); defer out.deref(); @memcpy(chars, s.slice16()); return out.toJS(globalObject); } - - if (comptime opts.decode_escape_sequences) { - s.resolveRopeIfNeeded(allocator); - - const decoded = js_lexer.decodeStringLiteralEscapeSequencesToUTF16(s.slice(allocator), allocator) catch unreachable; - defer allocator.free(decoded); - - var out, const chars = bun.String.createUninitialized(.utf16, decoded.len); - defer out.deref(); - @memcpy(chars, decoded); - - return out.toJS(globalObject); - } else { - return JSC.ZigString.fromUTF8(s.data).toValueGC(globalObject); - } } pub fn toZigString(s: *String, allocator: std.mem.Allocator) JSC.ZigString { @@ -3420,8 +3402,8 @@ pub const Expr = struct { return false; } - pub fn toJS(this: Expr, allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject, comptime opts: E.ToJsOpts) ToJSError!JSC.JSValue { - return this.data.toJS(allocator, globalObject, opts); + pub fn toJS(this: Expr, allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject) ToJSError!JSC.JSValue { + return this.data.toJS(allocator, globalObject); } pub inline fn isArray(this: *const Expr) bool { @@ -3613,7 +3595,7 @@ pub const Expr = struct { pub inline fn isString(expr: *const Expr) bool { return switch (expr.data) { - .e_string, .e_utf8_string => true, + .e_string => true, else => false, }; } @@ -3621,7 +3603,6 @@ pub const Expr = struct { pub inline fn asString(expr: *const Expr, allocator: std.mem.Allocator) ?string { switch (expr.data) { .e_string => |str| return str.string(allocator) catch bun.outOfMemory(), - .e_utf8_string => |str| return str.data, else => return null, } } @@ -3633,7 +3614,6 @@ pub const Expr = struct { defer allocator.free(utf8_str); return hash_fn(utf8_str); }, - .e_utf8_string => |str| return hash_fn(str.data), else => return null, } } @@ -3641,7 +3621,6 @@ pub const Expr = struct { pub inline fn asStringCloned(expr: *const Expr, allocator: std.mem.Allocator) OOM!?string { switch (expr.data) { .e_string => |str| return try str.stringCloned(allocator), - .e_utf8_string => |str| return try allocator.dupe(u8, str.data), else => return null, } } @@ -3649,7 +3628,6 @@ pub const Expr = struct { pub inline fn asStringZ(expr: *const Expr, allocator: std.mem.Allocator) OOM!?stringZ { switch (expr.data) { .e_string => |str| return try str.stringZ(allocator), - .e_utf8_string => |str| return try allocator.dupeZ(u8, str.data), else => return null, } } @@ -3831,18 +3809,6 @@ pub const Expr = struct { }, }; }, - E.UTF8String => { - return Expr{ - .loc = loc, - .data = Data{ - .e_utf8_string = brk: { - const item = allocator.create(Type) catch unreachable; - item.* = st; - break :brk item; - }, - }, - }; - }, E.Class => { return Expr{ .loc = loc, @@ -4253,14 +4219,6 @@ pub const Expr = struct { Data.Store.assert(); switch (Type) { - E.UTF8String => { - return Expr{ - .loc = loc, - .data = Data{ - .e_utf8_string = Data.Store.append(Type, st), - }, - }; - }, E.Array => { return Expr{ .loc = loc, @@ -4644,9 +4602,6 @@ pub const Expr = struct { e_require_main, e_inlined_enum, - /// A string that is UTF-8 encoded without escaping for use in JavaScript. - e_utf8_string, - // object, regex and array may have had side effects pub fn isPrimitiveLiteral(tag: Tag) bool { return switch (tag) { @@ -5340,7 +5295,6 @@ pub const Expr = struct { e_require_main, e_inlined_enum: *E.InlinedEnum, - e_utf8_string: *E.UTF8String, comptime { bun.assert_eql(@sizeOf(Data), 24); // Do not increase the size of Expr @@ -5800,9 +5754,6 @@ pub const Expr = struct { // pretend there is no comment e.value.data.writeToHasher(hasher, symbol_table); }, - .e_utf8_string => |e| { - hasher.update(e.data); - }, // no data .e_require_call_target, @@ -5862,7 +5813,6 @@ pub const Expr = struct { .e_string, .e_inlined_enum, .e_import_meta, - .e_utf8_string, => true, .e_template => |template| template.tag == null and template.parts.len == 0, @@ -6264,12 +6214,11 @@ pub const Expr = struct { return Equality.unknown; } - pub fn toJS(this: Data, allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject, comptime opts: E.ToJsOpts) ToJSError!JSC.JSValue { + pub fn toJS(this: Data, allocator: std.mem.Allocator, globalObject: *JSC.JSGlobalObject) ToJSError!JSC.JSValue { return switch (this) { - .e_array => |e| e.toJS(allocator, globalObject, opts), - .e_object => |e| e.toJS(allocator, globalObject, opts), - .e_string => |e| e.toJS(allocator, globalObject, opts), - .e_utf8_string => |e| JSC.ZigString.fromUTF8(e.data).toJS(globalObject), + .e_array => |e| e.toJS(allocator, globalObject), + .e_object => |e| e.toJS(allocator, globalObject), + .e_string => |e| e.toJS(allocator, globalObject), .e_null => JSC.JSValue.null, .e_undefined => JSC.JSValue.undefined, .e_boolean => |boolean| if (boolean.value) @@ -6279,7 +6228,7 @@ pub const Expr = struct { .e_number => |e| e.toJS(), // .e_big_int => |e| e.toJS(ctx, exception), - .e_inlined_enum => |inlined| inlined.value.data.toJS(allocator, globalObject, .{}), + .e_inlined_enum => |inlined| inlined.value.data.toJS(allocator, globalObject), .e_identifier, .e_import_identifier, @@ -6325,7 +6274,6 @@ pub const Expr = struct { E.Template, E.TemplatePart, E.Unary, - E.UTF8String, E.Yield, }, 512); @@ -8521,7 +8469,6 @@ pub const Macro = struct { const value = in.toJS( allocator, globalObject, - .{}, ) catch |e| { // Keeping a separate variable instead of modifying js_args.len // due to allocator.free call in defer diff --git a/src/js_lexer.zig b/src/js_lexer.zig index aec6e3c8e6f384..0a4eb1a70348c6 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -74,24 +74,9 @@ pub const JSONOptions = struct { /// mark as originally for a macro to enable inlining was_originally_macro: bool = false, - always_decode_escape_sequences: bool = false, - guess_indentation: bool = false, }; -pub fn decodeStringLiteralEscapeSequencesToUTF16(bytes: string, allocator: std.mem.Allocator) ![]const u16 { - var log = logger.Log.init(allocator); - defer log.deinit(); - const source = logger.Source.initEmptyFile(""); - var lexer = try NewLexer(.{}).init(&log, source, allocator); - defer lexer.deinit(); - - var buf = std.ArrayList(u16).init(allocator); - try lexer.decodeEscapeSequences(0, bytes, @TypeOf(buf), &buf); - - return buf.items; -} - pub fn NewLexer( comptime json_options: JSONOptions, ) type { @@ -103,7 +88,6 @@ pub fn NewLexer( json_options.ignore_trailing_escape_sequences, json_options.json_warn_duplicate_keys, json_options.was_originally_macro, - json_options.always_decode_escape_sequences, json_options.guess_indentation, ); } @@ -116,7 +100,6 @@ fn NewLexer_( comptime json_options_ignore_trailing_escape_sequences: bool, comptime json_options_json_warn_duplicate_keys: bool, comptime json_options_was_originally_macro: bool, - comptime json_options_always_decode_escape_sequences: bool, comptime json_options_guess_indentation: bool, ) type { const json_options = JSONOptions{ @@ -127,7 +110,6 @@ fn NewLexer_( .ignore_trailing_escape_sequences = json_options_ignore_trailing_escape_sequences, .json_warn_duplicate_keys = json_options_json_warn_duplicate_keys, .was_originally_macro = json_options_was_originally_macro, - .always_decode_escape_sequences = json_options_always_decode_escape_sequences, .guess_indentation = json_options_guess_indentation, }; return struct { @@ -187,12 +169,10 @@ fn NewLexer_( fn_or_arrow_start_loc: logger.Loc = logger.Loc.Empty, regex_flags_start: ?u16 = null, allocator: std.mem.Allocator, - /// In JavaScript, strings are stored as UTF-16, but nearly every string is ascii. - /// This means, usually, we can skip UTF8 -> UTF16 conversions. - string_literal_buffer: std.ArrayList(u16), - string_literal_slice: string = "", - string_literal: JavascriptString, - string_literal_is_ascii: bool = false, + string_literal_raw_content: string = "", + string_literal_start: usize = 0, + string_literal_raw_format: enum { ascii, utf16, needs_decode } = .ascii, + temp_buffer_u16: std.ArrayList(u16), /// Only used for JSON stringification when bundling /// This is a zero-bit type unless we're parsing JSON. @@ -210,45 +190,6 @@ fn NewLexer_( .{} else {}, - pub fn clone(self: *const LexerType) LexerType { - return LexerType{ - .log = self.log, - .source = self.source, - .current = self.current, - .start = self.start, - .end = self.end, - .did_panic = self.did_panic, - .approximate_newline_count = self.approximate_newline_count, - .previous_backslash_quote_in_jsx = self.previous_backslash_quote_in_jsx, - .token = self.token, - .has_newline_before = self.has_newline_before, - .has_pure_comment_before = self.has_pure_comment_before, - .has_no_side_effect_comment_before = self.has_no_side_effect_comment_before, - .preserve_all_comments_before = self.preserve_all_comments_before, - .is_legacy_octal_literal = self.is_legacy_octal_literal, - .is_log_disabled = self.is_log_disabled, - .comments_to_preserve_before = self.comments_to_preserve_before, - .code_point = self.code_point, - .identifier = self.identifier, - .regex_flags_start = self.regex_flags_start, - .jsx_pragma = self.jsx_pragma, - .source_mapping_url = self.source_mapping_url, - .number = self.number, - .rescan_close_brace_as_template_token = self.rescan_close_brace_as_template_token, - .prev_error_loc = self.prev_error_loc, - .allocator = self.allocator, - .string_literal_buffer = self.string_literal_buffer, - .string_literal_slice = self.string_literal_slice, - .string_literal = self.string_literal, - .string_literal_is_ascii = self.string_literal_is_ascii, - .is_ascii_only = self.is_ascii_only, - .all_comments = self.all_comments, - .prev_token_was_await_keyword = self.prev_token_was_await_keyword, - .await_keyword_loc = self.await_keyword_loc, - .fn_or_arrow_start_loc = self.fn_or_arrow_start_loc, - }; - } - pub inline fn loc(self: *const LexerType) logger.Loc { return logger.usize2Loc(self.start); } @@ -353,6 +294,7 @@ fn NewLexer_( } pub fn deinit(this: *LexerType) void { + this.temp_buffer_u16.clearAndFree(); this.all_comments.clearAndFree(); this.comments_to_preserve_before.clearAndFree(); } @@ -693,20 +635,15 @@ fn NewLexer_( } } - pub const InnerStringLiteral = packed struct { suffix_len: u3, needs_slow_path: bool }; + pub const InnerStringLiteral = packed struct { suffix_len: u3, needs_decode: bool }; - fn parseStringLiteralInnter(lexer: *LexerType, comptime quote: CodePoint) !InnerStringLiteral { - const check_for_backslash = comptime is_json and json_options.always_decode_escape_sequences; - var needs_slow_path = false; + fn parseStringLiteralInner(lexer: *LexerType, comptime quote: CodePoint) !InnerStringLiteral { var suffix_len: u3 = if (comptime quote == 0) 0 else 1; - var has_backslash: if (check_for_backslash) bool else void = if (check_for_backslash) false else {}; + var needs_decode = false; stringLiteral: while (true) { switch (lexer.code_point) { '\\' => { - if (comptime check_for_backslash) { - has_backslash = true; - } - + needs_decode = true; lexer.step(); // Handle Windows CRLF @@ -728,14 +665,12 @@ fn NewLexer_( switch (lexer.code_point) { // 0 cannot be in this list because it may be a legacy octal literal - 'v', 'f', 't', 'r', 'n', '`', '\'', '"', '\\', 0x2028, 0x2029 => { + '`', '\'', '"', '\\' => { lexer.step(); continue :stringLiteral; }, - else => { - needs_slow_path = true; - }, + else => {}, } }, // This indicates the end of the file @@ -754,7 +689,7 @@ fn NewLexer_( } // Template literals require newline normalization - needs_slow_path = true; + needs_decode = true; }, '\n' => { @@ -799,7 +734,7 @@ fn NewLexer_( // Non-ASCII strings need the slow path if (lexer.code_point >= 0x80) { - needs_slow_path = true; + needs_decode = true; } else if (is_json and lexer.code_point < 0x20) { try lexer.syntaxError(); } else if (comptime (quote == '"' or quote == '\'') and Environment.isNative) { @@ -820,9 +755,7 @@ fn NewLexer_( lexer.step(); } - if (comptime check_for_backslash) needs_slow_path = needs_slow_path or has_backslash; - - return InnerStringLiteral{ .needs_slow_path = needs_slow_path, .suffix_len = suffix_len }; + return InnerStringLiteral{ .needs_decode = needs_decode, .suffix_len = suffix_len }; } pub fn parseStringLiteral(lexer: *LexerType, comptime quote: CodePoint) !void { @@ -837,35 +770,20 @@ fn NewLexer_( // .env values may not always be quoted. lexer.step(); - const string_literal_details = try lexer.parseStringLiteralInnter(quote); + const string_literal_details = try lexer.parseStringLiteralInner(quote); // Reset string literal const base = if (comptime quote == 0) lexer.start else lexer.start + 1; - lexer.string_literal_slice = lexer.source.contents[base..@min(lexer.source.contents.len, lexer.end - @as(usize, string_literal_details.suffix_len))]; - lexer.string_literal_is_ascii = !string_literal_details.needs_slow_path; - lexer.string_literal_buffer.shrinkRetainingCapacity(0); - if (string_literal_details.needs_slow_path) { - lexer.string_literal_buffer.ensureUnusedCapacity(lexer.string_literal_slice.len) catch unreachable; - try lexer.decodeEscapeSequences(lexer.start, lexer.string_literal_slice, @TypeOf(lexer.string_literal_buffer), &lexer.string_literal_buffer); - lexer.string_literal = lexer.string_literal_buffer.items; - } - if (comptime is_json) lexer.is_ascii_only = lexer.is_ascii_only and lexer.string_literal_is_ascii; + lexer.string_literal_raw_content = lexer.source.contents[base..@min(lexer.source.contents.len, lexer.end - @as(usize, string_literal_details.suffix_len))]; + lexer.string_literal_raw_format = if (string_literal_details.needs_decode) .needs_decode else .ascii; + lexer.string_literal_start = lexer.start; + if (comptime is_json) lexer.is_ascii_only = lexer.is_ascii_only and !string_literal_details.needs_decode; if (comptime !FeatureFlags.allow_json_single_quotes) { if (quote == '\'' and is_json) { try lexer.addRangeError(lexer.range(), "JSON strings must use double quotes", .{}, true); } } - - // for (text) - // // if (needs_slow_path) { - // // // Slow path - - // // // lexer.string_literal = lexer.(lexer.start + 1, text); - // // } else { - // // // Fast path - - // // } } inline fn nextCodepointSlice(it: *LexerType) []const u8 { @@ -928,7 +846,6 @@ fn NewLexer_( pub const IdentifierKind = enum { normal, private }; pub const ScanResult = struct { token: T, contents: string }; - threadlocal var small_escape_sequence_buffer: [4096]u16 = undefined; const FakeArrayList16 = struct { items: []u16, i: usize = 0, @@ -948,8 +865,6 @@ fn NewLexer_( bun.assert(fake.items.len > fake.i + int); } }; - threadlocal var large_escape_sequence_list: std.ArrayList(u16) = undefined; - threadlocal var large_escape_sequence_list_loaded: bool = false; // This is an edge case that doesn't really exist in the wild, so it doesn't // need to be as fast as possible. @@ -1019,20 +934,12 @@ fn NewLexer_( // Second pass: re-use our existing escape sequence parser const original_text = lexer.raw(); - if (original_text.len < 1024) { - var buf = FakeArrayList16{ .items = &small_escape_sequence_buffer, .i = 0 }; - try lexer.decodeEscapeSequences(lexer.start, original_text, FakeArrayList16, &buf); - result.contents = lexer.utf16ToString(buf.items[0..buf.i]); - } else { - if (!large_escape_sequence_list_loaded) { - large_escape_sequence_list = try std.ArrayList(u16).initCapacity(lexer.allocator, original_text.len); - large_escape_sequence_list_loaded = true; - } - large_escape_sequence_list.shrinkRetainingCapacity(0); - try lexer.decodeEscapeSequences(lexer.start, original_text, std.ArrayList(u16), &large_escape_sequence_list); - result.contents = lexer.utf16ToString(large_escape_sequence_list.items); - } + bun.assert(lexer.temp_buffer_u16.items.len == 0); + defer lexer.temp_buffer_u16.clearRetainingCapacity(); + try lexer.temp_buffer_u16.ensureUnusedCapacity(original_text.len); + try lexer.decodeEscapeSequences(lexer.start, original_text, std.ArrayList(u16), &lexer.temp_buffer_u16); + result.contents = try lexer.utf16ToString(lexer.temp_buffer_u16.items); const identifier = if (kind != .private) result.contents @@ -1064,7 +971,6 @@ fn NewLexer_( // result.token = if (Keywords.has(result.contents)) .t_escaped_keyword else .t_identifier; - // const text = lexer.decodeEscapeSequences(lexer.start, lexer.raw(), ) return result; } @@ -2133,14 +2039,11 @@ fn NewLexer_( } pub fn initTSConfig(log: *logger.Log, source: logger.Source, allocator: std.mem.Allocator) !LexerType { - const empty_string_literal: JavascriptString = &emptyJavaScriptString; var lex = LexerType{ .log = log, .source = source, - .string_literal = empty_string_literal, - .string_literal_buffer = std.ArrayList(u16).init(allocator), + .temp_buffer_u16 = std.ArrayList(u16).init(allocator), .prev_error_loc = logger.Loc.Empty, - .string_literal_is_ascii = true, .allocator = allocator, .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator), .all_comments = std.ArrayList(logger.Range).init(allocator), @@ -2152,12 +2055,10 @@ fn NewLexer_( } pub fn initJSON(log: *logger.Log, source: logger.Source, allocator: std.mem.Allocator) !LexerType { - const empty_string_literal: JavascriptString = &emptyJavaScriptString; var lex = LexerType{ .log = log, - .string_literal_buffer = std.ArrayList(u16).init(allocator), .source = source, - .string_literal = empty_string_literal, + .temp_buffer_u16 = std.ArrayList(u16).init(allocator), .prev_error_loc = logger.Loc.Empty, .allocator = allocator, .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator), @@ -2170,12 +2071,10 @@ fn NewLexer_( } pub fn initWithoutReading(log: *logger.Log, source: logger.Source, allocator: std.mem.Allocator) LexerType { - const empty_string_literal: JavascriptString = &emptyJavaScriptString; return LexerType{ .log = log, .source = source, - .string_literal = empty_string_literal, - .string_literal_buffer = std.ArrayList(u16).init(allocator), + .temp_buffer_u16 = std.ArrayList(u16).init(allocator), .prev_error_loc = logger.Loc.Empty, .allocator = allocator, .comments_to_preserve_before = std.ArrayList(js_ast.G.Comment).init(allocator), @@ -2191,22 +2090,40 @@ fn NewLexer_( return lex; } - pub fn toEString(lexer: *LexerType) js_ast.E.String { - if (lexer.string_literal_is_ascii) { - return js_ast.E.String.init(lexer.string_literal_slice); - } else { - return js_ast.E.String.init(lexer.allocator.dupe(u16, lexer.string_literal) catch unreachable); + pub fn toEString(lexer: *LexerType) !js_ast.E.String { + switch (lexer.string_literal_raw_format) { + .ascii => { + // string_literal_raw_content contains ascii without escapes + return js_ast.E.String.init(lexer.string_literal_raw_content); + }, + .utf16 => { + // string_literal_raw_content is already parsed, duplicated, and utf-16 + return js_ast.E.String.init(@as([]const u16, @alignCast(std.mem.bytesAsSlice(u16, lexer.string_literal_raw_content)))); + }, + .needs_decode => { + // string_literal_raw_content contains escapes (ie '\n') that need to be converted to their values (ie 0x0A). + // escape parsing may cause a syntax error. + bun.assert(lexer.temp_buffer_u16.items.len == 0); + defer lexer.temp_buffer_u16.clearRetainingCapacity(); + try lexer.temp_buffer_u16.ensureUnusedCapacity(lexer.string_literal_raw_content.len); + try lexer.decodeEscapeSequences(lexer.string_literal_start, lexer.string_literal_raw_content, std.ArrayList(u16), &lexer.temp_buffer_u16); + const first_non_ascii = strings.firstNonASCII16([]const u16, lexer.temp_buffer_u16.items); + // prefer to store an ascii e.string rather than a utf-16 one. ascii takes less memory, and `+` folding is not yet supported on utf-16. + if (first_non_ascii != null) { + return js_ast.E.String.init(try lexer.allocator.dupe(u16, lexer.temp_buffer_u16.items)); + } else { + const result = try lexer.allocator.alloc(u8, lexer.temp_buffer_u16.items.len); + strings.copyU16IntoU8(result, []const u16, lexer.temp_buffer_u16.items); + return js_ast.E.String.init(result); + } + }, } } - pub fn toUTF8EString(lexer: *LexerType) js_ast.E.String { - if (lexer.string_literal_is_ascii) { - return js_ast.E.String.init(lexer.string_literal_slice); - } else { - var e_str = js_ast.E.String.init(lexer.string_literal); - e_str.toUTF8(lexer.allocator) catch unreachable; - return e_str; - } + pub fn toUTF8EString(lexer: *LexerType) !js_ast.E.String { + var res = try lexer.toEString(); + try res.toUTF8(lexer.allocator); + return res; } inline fn assertNotJSON(_: *const LexerType) void { @@ -2276,32 +2193,9 @@ fn NewLexer_( } } - // TODO: use wtf-8 encoding. - pub fn utf16ToStringWithValidation(lexer: *LexerType, js: JavascriptString) !string { - // return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js); - return utf16ToString(lexer, js); + pub fn utf16ToString(lexer: *LexerType, js: JavascriptString) !string { + return try strings.toUTF8AllocWithType(lexer.allocator, []const u16, js); } - - pub fn utf16ToString(lexer: *LexerType, js: JavascriptString) string { - var temp: [4]u8 = undefined; - var list = std.ArrayList(u8).initCapacity(lexer.allocator, js.len) catch unreachable; - var i: usize = 0; - while (i < js.len) : (i += 1) { - var r1 = @as(i32, @intCast(js[i])); - if (r1 >= 0xD800 and r1 <= 0xDBFF and i + 1 < js.len) { - const r2 = @as(i32, @intCast(js[i] + 1)); - if (r2 >= 0xDC00 and r2 <= 0xDFFF) { - r1 = (r1 - 0xD800) << 10 | (r2 - 0xDC00) + 0x10000; - i += 1; - } - } - const width = strings.encodeWTF8Rune(&temp, r1); - list.appendSlice(temp[0..width]) catch unreachable; - } - return list.items; - // return std.unicode.utf16leToUtf8Alloc(lexer.allocator, js) catch unreachable; - } - pub fn nextInsideJSXElement(lexer: *LexerType) !void { lexer.assertNotJSON(); @@ -2508,13 +2402,19 @@ fn NewLexer_( } lexer.token = .t_string_literal; - lexer.string_literal_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - 1]; - lexer.string_literal_is_ascii = !needs_decode; - lexer.string_literal_buffer.clearRetainingCapacity(); + + const raw_content_slice = lexer.source.contents[lexer.start + 1 .. lexer.end - 1]; if (needs_decode) { - lexer.string_literal_buffer.ensureTotalCapacity(lexer.string_literal_slice.len) catch unreachable; - try lexer.decodeJSXEntities(lexer.string_literal_slice, &lexer.string_literal_buffer); - lexer.string_literal = lexer.string_literal_buffer.items; + bun.assert(lexer.temp_buffer_u16.items.len == 0); + defer lexer.temp_buffer_u16.clearRetainingCapacity(); + try lexer.temp_buffer_u16.ensureUnusedCapacity(raw_content_slice.len); + try lexer.fixWhitespaceAndDecodeJSXEntities(raw_content_slice, &lexer.temp_buffer_u16); + + lexer.string_literal_raw_content = std.mem.sliceAsBytes(try lexer.allocator.dupe(u16, lexer.temp_buffer_u16.items)); + lexer.string_literal_raw_format = .utf16; + } else { + lexer.string_literal_raw_content = raw_content_slice; + lexer.string_literal_raw_format = .ascii; } } @@ -2574,18 +2474,23 @@ fn NewLexer_( } lexer.token = .t_string_literal; - lexer.string_literal_slice = lexer.source.contents[original_start..lexer.end]; - lexer.string_literal_is_ascii = !needs_fixing; - if (needs_fixing) { - // slow path - lexer.string_literal = try fixWhitespaceAndDecodeJSXEntities(lexer, lexer.string_literal_slice); + const raw_content_slice = lexer.source.contents[original_start..lexer.end]; - if (lexer.string_literal.len == 0) { + if (needs_fixing) { + bun.assert(lexer.temp_buffer_u16.items.len == 0); + defer lexer.temp_buffer_u16.clearRetainingCapacity(); + try lexer.temp_buffer_u16.ensureUnusedCapacity(raw_content_slice.len); + try lexer.fixWhitespaceAndDecodeJSXEntities(raw_content_slice, &lexer.temp_buffer_u16); + lexer.string_literal_raw_content = std.mem.sliceAsBytes(try lexer.allocator.dupe(u16, lexer.temp_buffer_u16.items)); + lexer.string_literal_raw_format = .utf16; + + if (lexer.temp_buffer_u16.items.len == 0) { lexer.has_newline_before = true; continue; } } else { - lexer.string_literal = &([_]u16{}); + lexer.string_literal_raw_content = raw_content_slice; + lexer.string_literal_raw_format = .ascii; } }, } @@ -2594,21 +2499,9 @@ fn NewLexer_( } } - threadlocal var jsx_decode_buf: std.ArrayList(u16) = undefined; - threadlocal var jsx_decode_init = false; - pub fn fixWhitespaceAndDecodeJSXEntities(lexer: *LexerType, text: string) !JavascriptString { + pub fn fixWhitespaceAndDecodeJSXEntities(lexer: *LexerType, text: string, decoded: *std.ArrayList(u16)) !void { lexer.assertNotJSON(); - if (!jsx_decode_init) { - jsx_decode_init = true; - jsx_decode_buf = std.ArrayList(u16).init(default_allocator); - } - jsx_decode_buf.clearRetainingCapacity(); - - var decoded = jsx_decode_buf; - defer jsx_decode_buf = decoded; - const decoded_ptr = &decoded; - var after_last_non_whitespace: ?u32 = null; // Trim whitespace off the end of the first line @@ -2627,7 +2520,7 @@ fn NewLexer_( } // Trim whitespace off the start and end of lines in the middle - try lexer.decodeJSXEntities(text[first_non_whitespace.?..after_last_non_whitespace.?], &decoded); + try lexer.decodeJSXEntities(text[first_non_whitespace.?..after_last_non_whitespace.?], decoded); } // Reset for the next line @@ -2651,10 +2544,8 @@ fn NewLexer_( try decoded.append(' '); } - try decodeJSXEntities(lexer, text[start..text.len], decoded_ptr); + try decodeJSXEntities(lexer, text[start..text.len], decoded); } - - return decoded.items; } fn maybeDecodeJSXEntity(lexer: *LexerType, text: string, cursor: *strings.CodepointIterator.Cursor) void { diff --git a/src/js_parser.zig b/src/js_parser.zig index da72afcb552598..ad64bd2e1119bc 100644 --- a/src/js_parser.zig +++ b/src/js_parser.zig @@ -3516,7 +3516,7 @@ pub const Parser = struct { decls[0] = .{ .binding = p.b(B.Identifier{ .ref = p.dirname_ref }, logger.Loc.Empty), .value = p.newExpr( - E.UTF8String{ + E.String{ .data = p.source.path.name.dir, }, logger.Loc.Empty, @@ -3528,7 +3528,7 @@ pub const Parser = struct { decls[@as(usize, @intFromBool(uses_dirname))] = .{ .binding = p.b(B.Identifier{ .ref = p.filename_ref }, logger.Loc.Empty), .value = p.newExpr( - E.UTF8String{ + E.String{ .data = p.source.path.text, }, logger.Loc.Empty, @@ -11068,7 +11068,7 @@ fn NewParser_( if (strings.eqlComptime(name, "require") and p.lexer.token == .t_open_paren) { // "import ns = require('x')" try p.lexer.next(); - const path = p.newExpr(p.lexer.toEString(), p.lexer.loc()); + const path = p.newExpr(try p.lexer.toEString(), p.lexer.loc()); try p.lexer.expect(.t_string_literal); try p.lexer.expect(.t_close_paren); if (!opts.is_typescript_declare) { @@ -11106,16 +11106,16 @@ fn NewParser_( fn parseClauseAlias(p: *P, kind: string) !string { const loc = p.lexer.loc(); - // The alias may now be a string (see https://github.com/tc39/ecma262/pull/2154) + // The alias may now be a utf-16 (not wtf-16) string (see https://github.com/tc39/ecma262/pull/2154) if (p.lexer.token == .t_string_literal) { - if (p.lexer.string_literal_is_ascii) { - return p.lexer.string_literal_slice; - } else if (p.lexer.utf16ToStringWithValidation(p.lexer.string_literal)) |alias| { - return alias; - } else |_| { + var estr = try p.lexer.toEString(); + if (estr.isUTF8()) { + return estr.slice8(); + } else if (strings.toUTF8AllocWithTypeWithoutInvalidSurrogatePairs(p.lexer.allocator, []const u16, estr.slice16())) |alias_utf8| { + return alias_utf8; + } else |err| { const r = p.source.rangeOfString(loc); - // TODO: improve error message - try p.log.addRangeErrorFmt(p.source, r, p.allocator, "Invalid {s} alias because it contains an unpaired Unicode surrogate (like emoji)", .{kind}); + try p.log.addRangeErrorFmt(p.source, r, p.allocator, "Invalid {s} alias because it contains an unpaired Unicode surrogate ({s})", .{ kind, @errorName(err) }); return p.source.textForRange(r); } } @@ -11789,7 +11789,7 @@ fn NewParser_( // Parse the name if (p.lexer.token == .t_string_literal) { - value.name = p.lexer.toUTF8EString().data; + value.name = (try p.lexer.toUTF8EString()).slice8(); needs_symbol = js_lexer.isIdentifier(value.name); } else if (p.lexer.isIdentifierOrKeyword()) { value.name = p.lexer.identifier; @@ -12138,9 +12138,10 @@ fn NewParser_( } pub fn parsePath(p: *P) !ParsedPath { + const path_text = try p.lexer.toUTF8EString(); var path = ParsedPath{ .loc = p.lexer.loc(), - .text = p.lexer.string_literal_slice, + .text = path_text.slice8(), .is_macro = false, .import_tag = .none, }; @@ -12180,11 +12181,10 @@ fn NewParser_( } } } else if (p.lexer.token == .t_string_literal) { - if (p.lexer.string_literal_is_ascii) { - inline for (comptime std.enums.values(SupportedAttribute)) |t| { - if (strings.eqlComptime(p.lexer.string_literal_slice, @tagName(t))) { - break :brk t; - } + const string_literal_text = (try p.lexer.toUTF8EString()).slice8(); + inline for (comptime std.enums.values(SupportedAttribute)) |t| { + if (strings.eqlComptime(string_literal_text, @tagName(t))) { + break :brk t; } } } else { @@ -12198,44 +12198,43 @@ fn NewParser_( try p.lexer.expect(.t_colon); try p.lexer.expect(.t_string_literal); - if (p.lexer.string_literal_is_ascii) { - if (supported_attribute) |attr| { - switch (attr) { - .type => { - const type_attr = p.lexer.string_literal_slice; - if (strings.eqlComptime(type_attr, "macro")) { - path.is_macro = true; - } else if (strings.eqlComptime(type_attr, "sqlite")) { - path.import_tag = .with_type_sqlite; - if (has_seen_embed_true) { - path.import_tag = .with_type_sqlite_embedded; - } - } else if (strings.eqlComptime(type_attr, "json")) { - path.import_tag = .with_type_json; - } else if (strings.eqlComptime(type_attr, "toml")) { - path.import_tag = .with_type_toml; - } else if (strings.eqlComptime(type_attr, "text")) { - path.import_tag = .with_type_text; - } else if (strings.eqlComptime(type_attr, "file")) { - path.import_tag = .with_type_file; - } - }, - .embed => { - if (strings.eqlComptime(p.lexer.string_literal_slice, "true")) { - has_seen_embed_true = true; - if (path.import_tag == .with_type_sqlite) { - path.import_tag = .with_type_sqlite_embedded; - } + const string_literal_text = (try p.lexer.toUTF8EString()).slice8(); + if (supported_attribute) |attr| { + switch (attr) { + .type => { + const type_attr = string_literal_text; + if (strings.eqlComptime(type_attr, "macro")) { + path.is_macro = true; + } else if (strings.eqlComptime(type_attr, "sqlite")) { + path.import_tag = .with_type_sqlite; + if (has_seen_embed_true) { + path.import_tag = .with_type_sqlite_embedded; } - }, - .bunBakeGraph => { - if (strings.eqlComptime(p.lexer.string_literal_slice, "ssr")) { - path.import_tag = .bake_resolve_to_ssr_graph; - } else { - try p.lexer.addRangeError(p.lexer.range(), "'bunBakeGraph' can only be set to 'ssr'", .{}, true); + } else if (strings.eqlComptime(type_attr, "json")) { + path.import_tag = .with_type_json; + } else if (strings.eqlComptime(type_attr, "toml")) { + path.import_tag = .with_type_toml; + } else if (strings.eqlComptime(type_attr, "text")) { + path.import_tag = .with_type_text; + } else if (strings.eqlComptime(type_attr, "file")) { + path.import_tag = .with_type_file; + } + }, + .embed => { + if (strings.eqlComptime(string_literal_text, "true")) { + has_seen_embed_true = true; + if (path.import_tag == .with_type_sqlite) { + path.import_tag = .with_type_sqlite_embedded; } - }, - } + } + }, + .bunBakeGraph => { + if (strings.eqlComptime(string_literal_text, "ssr")) { + path.import_tag = .bake_resolve_to_ssr_graph; + } else { + try p.lexer.addRangeError(p.lexer.range(), "'bunBakeGraph' can only be set to 'ssr'", .{}, true); + } + }, } } @@ -13788,7 +13787,7 @@ fn NewParser_( try p.lexer.rescanCloseBraceAsTemplateToken(); const tail: E.Template.Contents = brk: { - if (!include_raw) break :brk .{ .cooked = p.lexer.toEString() }; + if (!include_raw) break :brk .{ .cooked = try p.lexer.toEString() }; break :brk .{ .raw = p.lexer.rawTemplateContents() }; }; @@ -13814,7 +13813,7 @@ fn NewParser_( // This assumes the caller has already checked for TStringLiteral or TNoSubstitutionTemplateLiteral pub fn parseStringLiteral(p: *P) anyerror!Expr { const loc = p.lexer.loc(); - var str = p.lexer.toEString(); + var str = try p.lexer.toEString(); str.prefer_template = p.lexer.token == .t_no_substitution_template_literal; const expr = p.newExpr(str, loc); @@ -14899,7 +14898,7 @@ fn NewParser_( return try p.parseStringLiteral(); }, .t_template_head => { - const head = p.lexer.toEString(); + const head = try p.lexer.toEString(); const parts = try p.parseTemplateParts(false); @@ -15486,7 +15485,7 @@ fn NewParser_( try p.lexer.nextInsideJSXElement(); if (p.lexer.token == .t_string_literal) { previous_string_with_backslash_loc.start = @max(p.lexer.loc().start, p.lexer.previous_backslash_quote_in_jsx.loc.start); - const expr = p.newExpr(p.lexer.toEString(), previous_string_with_backslash_loc.*); + const expr = p.newExpr(try p.lexer.toEString(), previous_string_with_backslash_loc.*); try p.lexer.nextInsideJSXElement(); return expr; @@ -15622,7 +15621,7 @@ fn NewParser_( //
// note: template literals are not supported, operations on strings are not supported either T.t_string_literal => { - const key = p.newExpr(p.lexer.toEString(), p.lexer.loc()); + const key = p.newExpr(try p.lexer.toEString(), p.lexer.loc()); try p.lexer.next(); try props.append(G.Property{ .value = key, .key = key, .kind = .normal }); }, @@ -15695,7 +15694,7 @@ fn NewParser_( while (true) { switch (p.lexer.token) { .t_string_literal => { - try children.append(p.newExpr(p.lexer.toEString(), loc)); + try children.append(p.newExpr(try p.lexer.toEString(), loc)); try p.lexer.nextJSXElementChild(); }, .t_open_brace => { @@ -18683,12 +18682,11 @@ fn NewParser_( }, // TODO: e_inlined_enum -> .e_string -> "length" should inline the length .e_string => |str| { - // Disable until https://github.com/oven-sh/bun/issues/4217 is fixed - if (comptime FeatureFlags.minify_javascript_string_length) { - if (p.options.features.minify_syntax) { - // minify "long-string".length to 11 - if (strings.eqlComptime(name, "length")) { - return p.newExpr(E.Number{ .value = @floatFromInt(str.javascriptLength()) }, loc); + if (p.options.features.minify_syntax) { + // minify "long-string".length to 11 + if (strings.eqlComptime(name, "length")) { + if (str.javascriptLength()) |len| { + return p.newExpr(E.Number{ .value = @floatFromInt(len) }, loc); } } } diff --git a/src/js_printer.zig b/src/js_printer.zig index 0ffdf9ef47f688..cddee7fa5677be 100644 --- a/src/js_printer.zig +++ b/src/js_printer.zig @@ -82,9 +82,9 @@ pub fn writeModuleId(comptime Writer: type, writer: Writer, module_id: u32) void pub fn canPrintWithoutEscape(comptime CodePointType: type, c: CodePointType, comptime ascii_only: bool) bool { if (c <= last_ascii) { - return c >= first_ascii and c != '\\' and c != '"'; + return c >= first_ascii and c != '\\' and c != '"' and c != '\'' and c != '`' and c != '$'; } else { - return !ascii_only and c != 0xFEFF and (c < first_high_surrogate or c > last_low_surrogate); + return !ascii_only and c != 0xFEFF and c != 0x2028 and c != 0x2029 and (c < first_high_surrogate or c > last_low_surrogate); } } @@ -95,9 +95,8 @@ pub fn bestQuoteCharForString(comptime Type: type, str: []const Type, allow_back var single_cost: usize = 0; var double_cost: usize = 0; var backtick_cost: usize = 0; - var char: u8 = 0; var i: usize = 0; - while (i < str.len) { + while (i < @min(str.len, 1024)) { switch (str[i]) { '\'' => { single_cost += 1; @@ -108,10 +107,9 @@ pub fn bestQuoteCharForString(comptime Type: type, str: []const Type, allow_back '`' => { backtick_cost += 1; }, - '\r', '\n' => { - if (allow_backtick) { - return '`'; - } + '\n' => { + single_cost += 1; + double_cost += 1; }, '\\' => { i += 1; @@ -126,18 +124,13 @@ pub fn bestQuoteCharForString(comptime Type: type, str: []const Type, allow_back i += 1; } - char = '"'; - if (double_cost > single_cost) { - char = '\''; - - if (single_cost > backtick_cost and allow_backtick) { - char = '`'; - } - } else if (double_cost > backtick_cost and allow_backtick) { - char = '`'; + if (allow_backtick and backtick_cost < @min(single_cost, double_cost)) { + return '`'; } - - return char; + if (single_cost < double_cost) { + return '\''; + } + return '"'; } const Whitespacer = struct { @@ -170,11 +163,11 @@ fn ws(comptime str: []const u8) Whitespacer { return .{ .normal = Static.with, .minify = Static.without }; } -pub fn estimateLengthForJSON(input: []const u8, comptime ascii_only: bool) usize { +pub fn estimateLengthForUTF8(input: []const u8, comptime ascii_only: bool, comptime quote_char: u8) usize { var remaining = input; var len: usize = 2; // for quotes - while (strings.indexOfNeedsEscape(remaining)) |i| { + while (strings.indexOfNeedsEscape(remaining, quote_char)) |i| { len += i; remaining = remaining[i..]; const char_len = strings.wtf8ByteSequenceLengthWithInvalid(remaining[0]); @@ -212,222 +205,163 @@ pub fn quoteForJSON(text: []const u8, output_: MutableString, comptime ascii_onl return bytes; } -pub fn quoteForJSONBuffer(text: []const u8, bytes: *MutableString, comptime ascii_only: bool) !void { - try bytes.growIfNeeded(estimateLengthForJSON(text, ascii_only)); - try bytes.appendChar('"'); +pub fn writePreQuotedString(text_in: []const u8, comptime Writer: type, writer: Writer, comptime quote_char: u8, comptime ascii_only: bool, comptime json: bool, comptime encoding: strings.Encoding) !void { + const text = if (comptime encoding == .utf16) @as([]const u16, @alignCast(std.mem.bytesAsSlice(u16, text_in))) else text_in; var i: usize = 0; const n: usize = text.len; while (i < n) { - const width = strings.wtf8ByteSequenceLengthWithInvalid(text[i]); + const width = switch (comptime encoding) { + .latin1, .ascii => 1, + .utf8 => strings.wtf8ByteSequenceLengthWithInvalid(text[i]), + .utf16 => 1, + }; const clamped_width = @min(@as(usize, width), n -| i); - const c = strings.decodeWTF8RuneT( - &switch (clamped_width) { - // 0 is not returned by `wtf8ByteSequenceLengthWithInvalid` - 1 => .{ text[i], 0, 0, 0 }, - 2 => text[i..][0..2].* ++ .{ 0, 0 }, - 3 => text[i..][0..3].* ++ .{0}, - 4 => text[i..][0..4].*, - else => unreachable, + const c = switch (encoding) { + .utf8 => strings.decodeWTF8RuneT( + &switch (clamped_width) { + // 0 is not returned by `wtf8ByteSequenceLengthWithInvalid` + 1 => .{ text[i], 0, 0, 0 }, + 2 => text[i..][0..2].* ++ .{ 0, 0 }, + 3 => text[i..][0..3].* ++ .{0}, + 4 => text[i..][0..4].*, + else => unreachable, + }, + width, + i32, + 0, + ), + .ascii => brk: { + std.debug.assert(text[i] <= 0x7F); + break :brk text[i]; }, - width, - i32, - 0, - ); + .latin1 => brk: { + if (text[i] <= 0x7F) break :brk text[i]; + break :brk strings.latin1ToCodepointAssumeNotASCII(text[i], i32); + }, + .utf16 => brk: { + // TODO: if this is a part of a surrogate pair, we could parse the whole codepoint in order + // to emit it as a single \u{result} rather than two paired \uLOW\uHIGH. + // eg: "\u{10334}" will convert to "\uD800\uDF34" without this. + break :brk @as(i32, text[i]); + }, + }; if (canPrintWithoutEscape(i32, c, ascii_only)) { const remain = text[i + clamped_width ..]; - if (strings.indexOfNeedsEscape(remain)) |j| { - const text_chunk = text[i .. i + clamped_width]; - try bytes.appendSlice(text_chunk); - i += clamped_width; - try bytes.appendSlice(remain[0..j]); - i += j; - continue; - } else { - try bytes.appendSlice(text[i..]); - i = n; - break; + + switch (encoding) { + .ascii, .utf8 => { + if (strings.indexOfNeedsEscape(remain, quote_char)) |j| { + const text_chunk = text[i .. i + clamped_width]; + try writer.writeAll(text_chunk); + i += clamped_width; + try writer.writeAll(remain[0..j]); + i += j; + } else { + try writer.writeAll(text[i..]); + i = n; + break; + } + }, + .latin1, .utf16 => { + var codepoint_bytes: [4]u8 = undefined; + const codepoint_len = strings.encodeWTF8Rune(codepoint_bytes[0..4], c); + try writer.writeAll(codepoint_bytes[0..codepoint_len]); + i += clamped_width; + }, } + continue; } switch (c) { 0x07 => { - try bytes.appendSlice("\\x07"); + try writer.writeAll("\\x07"); i += 1; }, 0x08 => { - try bytes.appendSlice("\\b"); + try writer.writeAll("\\b"); i += 1; }, 0x0C => { - try bytes.appendSlice("\\f"); + try writer.writeAll("\\f"); i += 1; }, '\n' => { - try bytes.appendSlice("\\n"); + if (quote_char == '`') { + try writer.writeAll("\n"); + } else { + try writer.writeAll("\\n"); + } i += 1; }, std.ascii.control_code.cr => { - try bytes.appendSlice("\\r"); + try writer.writeAll("\\r"); i += 1; }, // \v std.ascii.control_code.vt => { - try bytes.appendSlice("\\v"); + try writer.writeAll("\\v"); i += 1; }, // "\\" '\\' => { - try bytes.appendSlice("\\\\"); + try writer.writeAll("\\\\"); i += 1; }, '"' => { - try bytes.appendSlice("\\\""); + if (quote_char == '"') { + try writer.writeAll("\\\""); + } else { + try writer.writeAll("\""); + } i += 1; }, - - '\t' => { - try bytes.appendSlice("\\t"); + '\'' => { + if (quote_char == '\'') { + try writer.writeAll("\\'"); + } else { + try writer.writeAll("'"); + } i += 1; }, - - else => { - i += @as(usize, width); - - if (c < 0xFFFF) { - const k = @as(usize, @intCast(c)); - bytes.ensureUnusedCapacity(6) catch unreachable; - const old = bytes.list.items.len; - bytes.list.items.len += 6; - - bytes.list.items[old .. old + 6].ptr[0..6].* = [_]u8{ - '\\', - 'u', - hex_chars[(k >> 12) & 0xF], - hex_chars[(k >> 8) & 0xF], - hex_chars[(k >> 4) & 0xF], - hex_chars[k & 0xF], - }; + '`' => { + if (quote_char == '`') { + try writer.writeAll("\\`"); } else { - bytes.ensureUnusedCapacity(12) catch unreachable; - const old = bytes.list.items.len; - bytes.list.items.len += 12; - - const k = c - 0x10000; - const lo = @as(usize, @intCast(first_high_surrogate + ((k >> 10) & 0x3FF))); - const hi = @as(usize, @intCast(first_low_surrogate + (k & 0x3FF))); - - bytes.list.items[old .. old + 12][0..12].* = [_]u8{ - '\\', - 'u', - hex_chars[lo >> 12], - hex_chars[(lo >> 8) & 15], - hex_chars[(lo >> 4) & 15], - hex_chars[lo & 15], - '\\', - 'u', - hex_chars[hi >> 12], - hex_chars[(hi >> 8) & 15], - hex_chars[(hi >> 4) & 15], - hex_chars[hi & 15], - }; + try writer.writeAll("`"); } + i += 1; }, - } - } - bytes.appendChar('"') catch unreachable; -} - -pub fn writeJSONString(input: []const u8, comptime Writer: type, writer: Writer, comptime encoding: strings.Encoding) !void { - try writer.writeAll("\""); - var text = input; - const end = text.ptr + text.len; - if (comptime encoding == .utf16) { - @compileError("not implemented yet"); - } - - while (text.ptr != end) { - const width = if (comptime encoding == .latin1 or encoding == .ascii) - 1 - else - strings.wtf8ByteSequenceLengthWithInvalid(text[0]); - - const c: i32 = if (comptime encoding == .utf8) - strings.decodeWTF8RuneT(text.ptr[0..4], width, i32, 0) - else brk: { - const char = text[0]; - if (char <= 0x7F) { - break :brk char; - } else break :brk strings.latin1ToCodepointAssumeNotASCII(char, i32); - }; - if (canPrintWithoutEscape(i32, c, false)) { - const remain = text[width..]; - if (encoding != .utf8 and width > 0) { - var codepoint_bytes: [4]u8 = undefined; - std.mem.writeInt(i32, &codepoint_bytes, c, .little); - try writer.writeAll( - codepoint_bytes[0..strings.encodeWTF8Rune(codepoint_bytes[0..4], c)], - ); - } else if (encoding == .utf8) { - try writer.writeAll(text[0..width]); - } - - if (strings.indexOfNeedsEscape(remain)) |j| { - try writer.writeAll(remain[0..j]); - text = remain[j..]; - continue; - } else { - try writer.writeAll(remain); - break; - } - } - switch (c) { - // Special-case the bell character since it may cause dumping this file to - // the terminal to make a sound, which is undesirable. Note that we can't - // use an octal literal to print this shorter since octal literals are not - // allowed in strict mode (or in template strings). - 0x07 => { - try writer.writeAll("\\x07"); - text = text[1..]; - }, - 0x08 => { - try writer.writeAll("\\b"); - text = text[1..]; - }, - 0x0C => { - try writer.writeAll("\\f"); - text = text[1..]; - }, - '\n' => { - try writer.writeAll("\\n"); - text = text[1..]; - }, - std.ascii.control_code.cr => { - try writer.writeAll("\\r"); - text = text[1..]; - }, - // \v - std.ascii.control_code.vt => { - try writer.writeAll("\\v"); - text = text[1..]; - }, - // "\\" - '\\' => { - try writer.writeAll("\\\\"); - text = text[1..]; - }, - '"' => { - try writer.writeAll("\\\""); - text = text[1..]; + '$' => { + if (quote_char == '`') { + const remain = text[i + clamped_width ..]; + if (remain.len > 0 and remain[0] == '{') { + try writer.writeAll("\\$"); + } else { + try writer.writeAll("$"); + } + } else { + try writer.writeAll("$"); + } + i += 1; }, '\t' => { try writer.writeAll("\\t"); - text = text[1..]; + i += 1; }, else => { - text = text[@as(usize, width)..]; + i += @as(usize, width); - if (c < 0xFFFF) { + if (c < 0xFF and !json) { + const k = @as(usize, @intCast(c)); + + try writer.writeAll(&[_]u8{ + '\\', + 'x', + hex_chars[(k >> 4) & 0xF], + hex_chars[k & 0xF], + }); + } else if (c < 0xFFFF) { const k = @as(usize, @intCast(c)); try writer.writeAll(&[_]u8{ @@ -461,6 +395,19 @@ pub fn writeJSONString(input: []const u8, comptime Writer: type, writer: Writer, }, } } +} +pub fn quoteForJSONBuffer(text: []const u8, bytes: *MutableString, comptime ascii_only: bool) !void { + const writer = bytes.writer(); + + try bytes.growIfNeeded(estimateLengthForUTF8(text, ascii_only, '"')); + try bytes.appendChar('"'); + try writePreQuotedString(text, @TypeOf(writer), writer, '"', ascii_only, true, .utf8); + bytes.appendChar('"') catch unreachable; +} + +pub fn writeJSONString(input: []const u8, comptime Writer: type, writer: Writer, comptime encoding: strings.Encoding) !void { + try writer.writeAll("\""); + try writePreQuotedString(input, Writer, writer, '"', false, true, encoding); try writer.writeAll("\""); } @@ -986,6 +933,9 @@ fn NewPrinter( p.writer.print(@TypeOf(span), span); }, else => { + if (Environment.allow_assert and ascii_only) { + for (str) |char| std.debug.assert(char > 0 and char < 0x80); + } p.writer.print(StringType, str); }, } @@ -1427,7 +1377,7 @@ fn NewPrinter( p.printSpaceBeforeIdentifier(); p.printIdentifier(alias); } else { - p.printQuotedUTF8(alias, false); + p.printStringLiteralUTF8(alias, false); } } @@ -1641,228 +1591,25 @@ fn NewPrinter( p.fmt("{d}", .{float}) catch {}; } - pub fn printQuotedUTF16(e: *Printer, text: []const u16, quote: u8) void { - var i: usize = 0; - const n: usize = text.len; - - outer: while (i < n) { - const CodeUnitType = u32; - - const c: CodeUnitType = text[i]; - i += 1; - - switch (c) { - - // Special-case the null character since it may mess with code written in C - // that treats null characters as the end of the string. - 0x00 => { - // We don't want "\x001" to be written as "\01" - if (i < n and text[i] >= '0' and text[i] <= '9') { - e.print("\\x00"); - } else { - e.print("\\0"); - } - }, - - // Special-case the bell character since it may cause dumping this file to - // the terminal to make a sound, which is undesirable. Note that we can't - // use an octal literal to print this shorter since octal literals are not - // allowed in strict mode (or in template strings). - 0x07 => { - e.print("\\x07"); - }, - 0x08 => { - if (quote == '`') - e.print(0x08) - else - e.print("\\b"); - }, - 0x0C => { - if (quote == '`') - e.print(0x000C) - else - e.print("\\f"); - }, - '\t' => { - if (quote == '`') - e.print("\t") - else - e.print("\\t"); - }, - '\n' => { - if (quote == '`') { - e.print('\n'); - } else { - e.print("\\n"); - } - }, - // we never print \r un-escaped - std.ascii.control_code.cr => { - e.print("\\r"); - }, - // \v - std.ascii.control_code.vt => { - if (quote == '`') { - e.print(std.ascii.control_code.vt); - } else { - e.print("\\v"); - } - }, - // "\\" - '\\' => { - e.print("\\\\"); - }, - - '\'' => { - if (quote == '\'') { - e.print('\\'); - } - e.print("'"); - }, - - '"' => { - if (quote == '"') { - e.print('\\'); - } - - e.print("\""); - }, - '`' => { - if (quote == '`') { - e.print('\\'); - } - - e.print("`"); - }, - '$' => { - if (quote == '`' and i < n and text[i] == '{') { - e.print('\\'); - } - - e.print('$'); - }, - 0x2028 => { - e.print("\\u2028"); - }, - 0x2029 => { - e.print("\\u2029"); - }, - 0xFEFF => { - e.print("\\uFEFF"); - }, - - else => { - switch (c) { - first_ascii...last_ascii => { - e.print(@as(u8, @intCast(c))); - - // Fast path for printing long UTF-16 template literals - // this only applies to template literal strings - // but we print a template literal if there is a \n or a \r - // which is often if the string is long and UTF-16 - if (quote == '`') { - const remain = text[i..]; - if (remain.len > 1 and remain[0] < last_ascii and remain[0] > first_ascii and - remain[0] != '$' and - remain[0] != '\\' and - remain[0] != '`') - { - if (strings.@"nextUTF16NonASCIIOr$`\\"([]const u16, remain)) |count_| { - if (count_ == 0) - unreachable; // conditional above checks this - - const len = count_ - 1; - i += len; - var ptr = e.writer.reserve(len) catch unreachable; - const to_copy = ptr[0..len]; - - strings.copyU16IntoU8(to_copy, []const u16, remain[0..len]); - e.writer.advance(len); - continue :outer; - } else { - const count = @as(u32, @truncate(remain.len)); - var ptr = e.writer.reserve(count) catch unreachable; - const to_copy = ptr[0..count]; - strings.copyU16IntoU8(to_copy, []const u16, remain); - e.writer.advance(count); - i += count; - } - } - } - }, - first_high_surrogate...last_high_surrogate => { - - // Is there a next character? - - if (i < n) { - const c2: CodeUnitType = text[i]; - - if (c2 >= first_low_surrogate and c2 <= last_low_surrogate) { - i += 1; - - // Escape this character if UTF-8 isn't allowed - if (ascii_only_always_on_unless_minifying) { - var ptr = e.writer.reserve(12) catch unreachable; - ptr[0..12].* = [_]u8{ - '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15], - '\\', 'u', hex_chars[c2 >> 12], hex_chars[(c2 >> 8) & 15], hex_chars[(c2 >> 4) & 15], hex_chars[c2 & 15], - }; - e.writer.advance(12); - - continue; - // Otherwise, encode to UTF-8 - } - - const r: CodeUnitType = 0x10000 + (((c & 0x03ff) << 10) | (c2 & 0x03ff)); - - var ptr = e.writer.reserve(4) catch unreachable; - e.writer.advance(strings.encodeWTF8RuneT(ptr[0..4], CodeUnitType, r)); - continue; - } - } - - // Write an unpaired high surrogate - var ptr = e.writer.reserve(6) catch unreachable; - ptr[0..6].* = [_]u8{ '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15] }; - e.writer.advance(6); - }, - // Is this an unpaired low surrogate or four-digit hex escape? - first_low_surrogate...last_low_surrogate => { - // Write an unpaired high surrogate - var ptr = e.writer.reserve(6) catch unreachable; - ptr[0..6].* = [_]u8{ '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15] }; - e.writer.advance(6); - }, - else => { - if (ascii_only_always_on_unless_minifying) { - if (c > 0xFF) { - var ptr = e.writer.reserve(6) catch unreachable; - // Write an unpaired high surrogate - ptr[0..6].* = [_]u8{ '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15] }; - e.writer.advance(6); - } else { - // Can this be a two-digit hex escape? - var ptr = e.writer.reserve(4) catch unreachable; - ptr[0..4].* = [_]u8{ '\\', 'x', hex_chars[c >> 4], hex_chars[c & 15] }; - e.writer.advance(4); - } - } else { - // chars < 255 as two digit hex escape - if (c <= 0xFF) { - var ptr = e.writer.reserve(4) catch unreachable; - ptr[0..4].* = [_]u8{ '\\', 'x', hex_chars[c >> 4], hex_chars[c & 15] }; - e.writer.advance(4); - continue; - } + pub fn printStringCharactersUTF8(e: *Printer, text: []const u8, quote: u8) void { + const writer = e.writer.stdWriter(); + (switch (quote) { + '\'' => writePreQuotedString(text, @TypeOf(writer), writer, '\'', ascii_only, false, .utf8), + '"' => writePreQuotedString(text, @TypeOf(writer), writer, '"', ascii_only, false, .utf8), + '`' => writePreQuotedString(text, @TypeOf(writer), writer, '`', ascii_only, false, .utf8), + else => unreachable, + }) catch |err| switch (err) {}; + } + pub fn printStringCharactersUTF16(e: *Printer, text: []const u16, quote: u8) void { + const slice = std.mem.sliceAsBytes(text); - var ptr = e.writer.reserve(4) catch return; - e.writer.advance(strings.encodeWTF8RuneT(ptr[0..4], CodeUnitType, c)); - } - }, - } - }, - } - } + const writer = e.writer.stdWriter(); + (switch (quote) { + '\'' => writePreQuotedString(slice, @TypeOf(writer), writer, '\'', ascii_only, false, .utf16), + '"' => writePreQuotedString(slice, @TypeOf(writer), writer, '"', ascii_only, false, .utf16), + '`' => writePreQuotedString(slice, @TypeOf(writer), writer, '`', ascii_only, false, .utf16), + else => unreachable, + }) catch |err| switch (err) {}; } pub fn isUnboundEvalIdentifier(p: *Printer, value: Expr) bool { @@ -1884,9 +1631,9 @@ fn NewPrinter( } pub fn printRequireError(p: *Printer, text: string) void { - p.print("(()=>{throw new Error(`Cannot require module "); - p.printQuotedUTF8(text, false); - p.print("`);})()"); + p.print("(()=>{throw new Error(\"Cannot require module \"+"); + p.printStringLiteralUTF8(text, false); + p.print(");})()"); } pub inline fn importRecord( @@ -2009,9 +1756,7 @@ fn NewPrinter( p.print(".require("); { const path = input_files[record.source_index.get()].path; - p.print('"'); - p.printUTF8StringEscapedQuotes(path.pretty, '"'); - p.print('"'); + p.printStringLiteralUTF8(path.pretty, false); } p.print(")"); } else if (!meta.was_unwrapped_require) { @@ -2084,9 +1829,7 @@ fn NewPrinter( p.print(".require("); { const path = record.path; - p.print('"'); - p.printUTF8StringEscapedQuotes(path.pretty, '"'); - p.print('"'); + p.printStringLiteralUTF8(path.pretty, false); } p.print(")"); return; @@ -2156,14 +1899,22 @@ fn NewPrinter( p.printWhitespacer(ws("/* @__PURE__ */ ")); } - pub fn printQuotedUTF8(p: *Printer, str: string, allow_backtick: bool) void { + pub fn printStringLiteralEString(p: *Printer, str: *E.String, allow_backtick: bool) void { + const quote = bestQuoteCharForEString(str, allow_backtick); + p.print(quote); + p.printStringCharactersEString(str, quote); + p.print(quote); + } + pub fn printStringLiteralUTF8(p: *Printer, str: string, allow_backtick: bool) void { + if (Environment.allow_assert) std.debug.assert(std.unicode.wtf8ValidateSlice(str)); + const quote = if (comptime !is_json) bestQuoteCharForString(u8, str, allow_backtick) else '"'; p.print(quote); - p.print(str); + p.printStringCharactersUTF8(str, quote); p.print(quote); } @@ -2179,9 +1930,10 @@ fn NewPrinter( const name = p.renamer.nameForSymbol(item.name.ref.?); if (comptime as == .import) { - p.printClauseAlias(item.alias); - - if (!strings.eql(name, item.alias)) { + if (strings.eql(name, item.alias)) { + p.printIdentifier(name); + } else { + p.printClauseAlias(item.alias); p.print(" as "); p.addSourceMapping(item.alias_loc); p.printIdentifier(name); @@ -2208,16 +1960,6 @@ fn NewPrinter( } } - pub inline fn canPrintIdentifier(_: *Printer, name: string) bool { - if (comptime is_json) return false; - - if (comptime ascii_only or ascii_only_always_on_unless_minifying) { - return js_lexer.isLatin1Identifier(string, name); - } else { - return js_lexer.isIdentifier(name); - } - } - pub inline fn canPrintIdentifierUTF16(_: *Printer, name: []const u16) bool { if (comptime ascii_only or ascii_only_always_on_unless_minifying) { return js_lexer.isLatin1Identifier([]const u16, name); @@ -2414,12 +2156,12 @@ fn NewPrinter( p.printSymbol(p.options.commonjs_named_exports_ref); } - if (p.canPrintIdentifier(key)) { + if (js_lexer.isIdentifier(key)) { p.print("."); p.print(key); } else { p.print("["); - p.printPossiblyEscapedIdentifierString(key, true); + p.printStringLiteralUTF8(key, false); p.print("]"); } } else { @@ -2602,7 +2344,7 @@ fn NewPrinter( } p.print("("); - p.printQuotedUTF8(p.importRecord(e.import_record_index).path.text, true); + p.printStringLiteralUTF8(p.importRecord(e.import_record_index).path.text, true); p.print(")"); if (wrap) { @@ -2694,7 +2436,7 @@ fn NewPrinter( flags, ); - if (p.canPrintIdentifier(e.name)) { + if (js_lexer.isIdentifier(e.name)) { if (isOptionalChain) { p.print("?."); } else { @@ -2715,10 +2457,7 @@ fn NewPrinter( p.print("["); } - p.printPossiblyEscapedIdentifierString( - e.name, - true, - ); + p.printStringLiteralUTF8(e.name, false); p.print("]"); } @@ -3011,20 +2750,12 @@ fn NewPrinter( // If this was originally a template literal, print it as one as long as we're not minifying if (e.prefer_template and !p.options.minify_syntax) { p.print("`"); - p.printStringContent(e, '`'); + p.printStringCharactersEString(e, '`'); p.print("`"); return; } - const c = bestQuoteCharForEString(e, true); - - p.print(c); - p.printStringContent(e, c); - p.print(c); - }, - .e_utf8_string => |e| { - p.addSourceMapping(expr.loc); - quoteForJSONBuffer(e.data, p.writer.getMutableBuffer(), ascii_only) catch bun.outOfMemory(); + p.printStringLiteralEString(e, true); }, .e_template => |e| { if (e.tag) |tag| { @@ -3047,7 +2778,7 @@ fn NewPrinter( .cooked => |*cooked| { if (cooked.isPresent()) { cooked.resolveRopeIfNeeded(p.options.allocator); - p.printStringContent(cooked, '`'); + p.printStringCharactersEString(cooked, '`'); } }, } @@ -3061,7 +2792,7 @@ fn NewPrinter( .cooked => |*cooked| { if (cooked.isPresent()) { cooked.resolveRopeIfNeeded(p.options.allocator); - p.printStringContent(cooked, '`'); + p.printStringCharactersEString(cooked, '`'); } }, } @@ -3166,7 +2897,7 @@ fn NewPrinter( p.addSourceMapping(expr.loc); p.printSymbol(namespace.namespace_ref); const alias = namespace.alias; - if (p.canPrintIdentifier(alias)) { + if (js_lexer.isIdentifier(alias)) { p.print("."); // TODO: addSourceMappingForName p.printIdentifier(alias); @@ -3174,7 +2905,7 @@ fn NewPrinter( p.print("["); // TODO: addSourceMappingForName // p.addSourceMappingForName(alias); - p.printPossiblyEscapedIdentifierString(alias, true); + p.printStringLiteralUTF8(alias, false); p.print("]"); } @@ -3362,87 +3093,11 @@ fn NewPrinter( } // This assumes the string has already been quoted. - pub fn printStringContent(p: *Printer, str: *const E.String, c: u8) void { + pub fn printStringCharactersEString(p: *Printer, str: *const E.String, c: u8) void { if (!str.isUTF8()) { - // its already quoted for us! - p.printQuotedUTF16(str.slice16(), c); + p.printStringCharactersUTF16(str.slice16(), c); } else { - p.printUTF8StringEscapedQuotes(str.data, c); - } - } - - // Add one outer branch so the inner loop does fewer branches - pub fn printUTF8StringEscapedQuotes(p: *Printer, str: string, c: u8) void { - switch (c) { - '`' => _printUTF8StringEscapedQuotes(p, str, '`'), - '"' => _printUTF8StringEscapedQuotes(p, str, '"'), - '\'' => _printUTF8StringEscapedQuotes(p, str, '\''), - else => unreachable, - } - } - - pub fn _printUTF8StringEscapedQuotes(p: *Printer, str: string, comptime c: u8) void { - var utf8 = str; - var i: usize = 0; - // Walk the string searching for quote characters - // Escape any we find - // Skip over already-escaped strings - var len = utf8.len; - while (i < len) { - switch (utf8[i]) { - '\\' => i += 2, - '$' => { - if (comptime c == '`') { - p.print(utf8[0..i]); - p.print("\\$"); - utf8 = utf8[i + 1 ..]; - len = utf8.len; - i = 0; - } else { - i += 1; - } - }, - c => { - p.print(utf8[0..i]); - p.print("\\" ++ &[_]u8{c}); - utf8 = utf8[i + 1 ..]; - len = utf8.len; - i = 0; - }, - - else => i += 1, - } - } - if (utf8.len > 0) { - p.print(utf8); - } - } - - fn printBindingIdentifierName(p: *Printer, name: string, name_loc: logger.Loc) void { - p.addSourceMapping(name_loc); - - if (comptime !is_json and ascii_only) { - const quote = bestQuoteCharForString(u8, name, false); - p.print(quote); - p.printQuotedIdentifier(name); - p.print(quote); - } else { - p.printQuotedUTF8(name, false); - } - } - - fn printPossiblyEscapedIdentifierString(p: *Printer, name: string, allow_backtick: bool) void { - if (comptime !ascii_only or is_json) { - p.printQuotedUTF8(name, allow_backtick); - } else { - const quote = if (comptime !is_json) - bestQuoteCharForString(u8, name, allow_backtick) - else - '"'; - - p.print(quote); - p.printQuotedIdentifier(name); - p.print(quote); + p.printStringCharactersUTF8(str.data, c); } } @@ -3456,12 +3111,12 @@ fn NewPrinter( // that means the namespace alias is empty if (namespace.alias.len == 0) return; - if (p.canPrintIdentifier(namespace.alias)) { + if (js_lexer.isIdentifier(namespace.alias)) { p.print("."); p.printIdentifier(namespace.alias); } else { p.print("["); - p.printPossiblyEscapedIdentifierString(namespace.alias, true); + p.printStringLiteralUTF8(namespace.alias, false); p.print("]"); } } @@ -3687,11 +3342,11 @@ fn NewPrinter( // While each of those property keys are ASCII, a subset of ASCII is valid as the start of an identifier // "=" and ":" are not valid // So we need to check - if (p.canPrintIdentifier(key.data)) { - p.print(key.data); + if (!is_json and js_lexer.isIdentifier(key.data)) { + p.printIdentifier(key.data); } else { allow_shorthand = false; - p.printBindingIdentifierName(key.data, logger.Loc.Empty); + p.printStringLiteralEString(key, false); } // Use a shorthand property if the names are the same @@ -3728,7 +3383,7 @@ fn NewPrinter( else => {}, } } - } else if (p.canPrintIdentifierUTF16(key.slice16())) { + } else if (!is_json and p.canPrintIdentifierUTF16(key.slice16())) { p.printSpaceBeforeIdentifier(); p.printIdentifierUTF16(key.slice16()) catch unreachable; @@ -3771,7 +3426,7 @@ fn NewPrinter( } else { const c = bestQuoteCharForString(u16, key.slice16(), false); p.print(c); - p.printQuotedUTF16(key.slice16(), c); + p.printStringCharactersUTF16(key.slice16(), c); p.print(c); } }, @@ -3929,7 +3584,7 @@ fn NewPrinter( // ^ // That needs to be: // "aria-label": ariaLabel, - if (p.canPrintIdentifier(str.data)) { + if (js_lexer.isIdentifier(str.data)) { p.printIdentifier(str.data); // Use a shorthand property if the names are the same @@ -3943,7 +3598,7 @@ fn NewPrinter( else => {}, } } else { - p.printPossiblyEscapedIdentifierString(str.data, false); + p.printStringLiteralUTF8(str.data, false); } } else if (p.canPrintIdentifierUTF16(str.slice16())) { p.printSpaceBeforeIdentifier(); @@ -4633,9 +4288,9 @@ fn NewPrinter( }, .auto_onimportcss, .facade_onimportcss => { - p.print("globalThis.document?.dispatchEvent(new CustomEvent(\"onimportcss\", {detail: \""); - p.print(record.path.text); - p.print("\"}));\n"); + p.print("globalThis.document?.dispatchEvent(new CustomEvent(\"onimportcss\", {detail: "); + p.printStringLiteralUTF8(record.path.text, false); + p.print("}));\n"); // If they actually use the code, then we emit a facade that just echos whatever they write if (s.default_name) |name| { @@ -4886,7 +4541,7 @@ fn NewPrinter( p.printIndent(); p.printSpaceBeforeIdentifier(); - p.printQuotedUTF8(s.value, false); + p.printStringLiteralUTF8(s.value, false); p.printSemicolonAfterStatement(); }, .s_break => |s| { @@ -4957,13 +4612,13 @@ fn NewPrinter( const quote = bestQuoteCharForString(u8, import_record.path.text, false); if (import_record.print_namespace_in_path and !import_record.path.isFile()) { p.print(quote); - p.print(import_record.path.namespace); + p.printStringCharactersUTF8(import_record.path.namespace, quote); p.print(":"); - p.printIdentifier(import_record.path.text); + p.printStringCharactersUTF8(import_record.path.text, quote); p.print(quote); } else { p.print(quote); - p.printIdentifier(import_record.path.text); + p.printStringCharactersUTF8(import_record.path.text, quote); p.print(quote); } } @@ -5107,7 +4762,7 @@ fn NewPrinter( p.print("Object.defineProperty("); p.printModuleExportSymbol(); p.print(","); - p.printQuotedUTF8(name, true); + p.printStringLiteralUTF8(name, true); p.printWhitespacer(ws(",{get: () => (")); p.printLoadFromBundle(import_record_index); @@ -5124,7 +4779,7 @@ fn NewPrinter( p.print("Object.defineProperty("); p.printModuleExportSymbol(); p.print(","); - p.printQuotedUTF8(name, true); + p.printStringLiteralUTF8(name, true); p.print(",{get: () => "); p.printIdentifier(identifier); p.print(", enumerable: true, configurable: true})"); @@ -5377,13 +5032,13 @@ fn NewPrinter( pub fn printIdentifier(p: *Printer, identifier: string) void { if (comptime ascii_only) { - p.printQuotedIdentifier(identifier); + p.printIdentifierAsciiOnly(identifier); } else { p.print(identifier); } } - fn printQuotedIdentifier(p: *Printer, identifier: string) void { + fn printIdentifierAsciiOnly(p: *Printer, identifier: string) void { var ascii_start: usize = 0; var is_ascii = false; var iter = CodepointIterator.init(identifier); @@ -5612,6 +5267,14 @@ pub fn NewWriter( }; } + pub fn stdWriter(self: *Self) std.io.Writer(*Self, error{}, stdWriterWrite) { + return .{ .context = self }; + } + pub fn stdWriterWrite(self: *Self, bytes: []const u8) error{}!usize { + self.print([]const u8, bytes); + return bytes.len; + } + pub fn isCopyFileRangeSupported() bool { return comptime std.meta.hasFn(ContextType, "copyFileRange"); } @@ -6335,9 +5998,7 @@ pub fn printWithWriterAndPlatform( if (opts.module_type == .internal_bake_dev) { printer.indent(); printer.printIndent(); - printer.print('"'); - printer.printUTF8StringEscapedQuotes(source.path.pretty, '"'); - printer.print('"'); + printer.printStringLiteralUTF8(source.path.pretty, false); printer.printFunc(parts[0].stmts[0].data.s_expr.value.data.e_function.func); printer.print(",\n"); } else { diff --git a/src/json_parser.zig b/src/json_parser.zig index 4998d86f0e1d2e..4d30b2d09018f2 100644 --- a/src/json_parser.zig +++ b/src/json_parser.zig @@ -115,7 +115,6 @@ fn JSONLikeParser(comptime opts: js_lexer.JSONOptions) type { opts.ignore_trailing_escape_sequences, opts.json_warn_duplicate_keys, opts.was_originally_macro, - opts.always_decode_escape_sequences, opts.guess_indentation, ); } @@ -128,7 +127,6 @@ fn JSONLikeParser_( comptime opts_ignore_trailing_escape_sequences: bool, comptime opts_json_warn_duplicate_keys: bool, comptime opts_was_originally_macro: bool, - comptime opts_always_decode_escape_sequences: bool, comptime opts_guess_indentation: bool, ) type { const opts = js_lexer.JSONOptions{ @@ -139,7 +137,6 @@ fn JSONLikeParser_( .ignore_trailing_escape_sequences = opts_ignore_trailing_escape_sequences, .json_warn_duplicate_keys = opts_json_warn_duplicate_keys, .was_originally_macro = opts_was_originally_macro, - .always_decode_escape_sequences = opts_always_decode_escape_sequences, .guess_indentation = opts_guess_indentation, }; return struct { @@ -193,7 +190,7 @@ fn JSONLikeParser_( return newExpr(E.Null{}, loc); }, .t_string_literal => { - var str: E.String = p.lexer.toEString(); + var str: E.String = try p.lexer.toEString(); if (comptime force_utf8) { str.toUTF8(p.allocator) catch unreachable; } @@ -282,9 +279,9 @@ fn JSONLikeParser_( } const str = if (comptime force_utf8) - p.lexer.toUTF8EString() + try p.lexer.toUTF8EString() else - p.lexer.toEString(); + try p.lexer.toEString(); const key_range = p.lexer.range(); const key = newExpr(str, key_range.loc); @@ -297,7 +294,7 @@ fn JSONLikeParser_( // Warn about duplicate keys if (duplicate_get_or_put.found_existing) { - p.log.addRangeWarningFmt(p.source(), key_range, p.allocator, "Duplicate key \"{s}\" in object literal", .{p.lexer.string_literal_slice}) catch unreachable; + p.log.addRangeWarningFmt(p.source(), key_range, p.allocator, "Duplicate key \"{s}\" in object literal", .{try str.string(p.allocator)}) catch unreachable; } } @@ -419,7 +416,7 @@ pub const PackageJSONVersionChecker = struct { return newExpr(E.Null{}, loc); }, .t_string_literal => { - const str: E.String = p.lexer.toEString(); + const str: E.String = try p.lexer.toEString(); try p.lexer.next(); return newExpr(str, loc); @@ -466,7 +463,7 @@ pub const PackageJSONVersionChecker = struct { } } - const str = p.lexer.toEString(); + const str = try p.lexer.toEString(); const key_range = p.lexer.range(); const key = newExpr(str, key_range.loc); @@ -770,7 +767,6 @@ pub fn parsePackageJSONUTF8( var parser = try JSONLikeParser(.{ .is_json = true, - .always_decode_escape_sequences = false, .allow_comments = true, .allow_trailing_commas = true, }).init(allocator, source.*, log); @@ -806,7 +802,6 @@ pub fn parsePackageJSONUTF8AlwaysDecode( var parser = try JSONLikeParser(.{ .is_json = true, - .always_decode_escape_sequences = true, .allow_comments = true, .allow_trailing_commas = true, }).init(allocator, source.*, log); diff --git a/src/sourcemap/sourcemap.zig b/src/sourcemap/sourcemap.zig index dc5cbdb8ac3e9d..997a41708cbeff 100644 --- a/src/sourcemap/sourcemap.zig +++ b/src/sourcemap/sourcemap.zig @@ -179,10 +179,7 @@ pub fn parseJSON( if (item.data != .e_string) return error.InvalidSourceMap; - const utf16_decode = try bun.js_lexer.decodeStringLiteralEscapeSequencesToUTF16(item.data.e_string.string(arena) catch bun.outOfMemory(), arena); - defer arena.free(utf16_decode); - source_paths_slice.?[i] = bun.strings.toUTF8Alloc(alloc, utf16_decode) catch - return error.InvalidSourceMap; + source_paths_slice.?[i] = try alloc.dupe(u8, try item.data.e_string.string(alloc)); i += 1; }; @@ -229,11 +226,7 @@ pub fn parseJSON( break :content null; } - const utf16_decode = try bun.js_lexer.decodeStringLiteralEscapeSequencesToUTF16(str, arena); - defer arena.free(utf16_decode); - - break :content bun.strings.toUTF8Alloc(alloc, utf16_decode) catch - return error.InvalidSourceMap; + break :content try alloc.dupe(u8, str); } else null; return .{ diff --git a/src/string_immutable.zig b/src/string_immutable.zig index d62af2b977cfa0..1ec7a13fca4719 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -2152,6 +2152,20 @@ pub fn convertUTF16ToUTF8(list_: std.ArrayList(u8), comptime Type: type, utf16: return list; } +pub fn convertUTF16ToUTF8WithoutInvalidSurrogatePairs(list_: std.ArrayList(u8), comptime Type: type, utf16: Type) !std.ArrayList(u8) { + var list = list_; + const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le( + utf16, + list.items.ptr[0..list.capacity], + ); + if (result.status == .surrogate) { + return error.SurrogatePair; + } + + list.items.len = result.count; + return list; +} + pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !void { const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le( utf16, @@ -2167,6 +2181,20 @@ pub fn convertUTF16ToUTF8Append(list: *std.ArrayList(u8), utf16: []const u16) !v list.items.len += result.count; } +pub fn toUTF8AllocWithTypeWithoutInvalidSurrogatePairs(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 { + if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) { + const length = bun.simdutf.length.utf8.from.utf16.le(utf16); + // add 16 bytes of padding for SIMDUTF + var list = try std.ArrayList(u8).initCapacity(allocator, length + 16); + list = try convertUTF16ToUTF8(list, Type, utf16); + return list.items; + } + + var list = try std.ArrayList(u8).initCapacity(allocator, utf16.len); + list = try toUTF8ListWithType(list, Type, utf16); + return list.items; +} + pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 { if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) { const length = bun.simdutf.length.utf8.from.utf16.le(utf16); @@ -4230,21 +4258,30 @@ pub fn containsNewlineOrNonASCIIOrQuote(slice_: []const u8) bool { return false; } -pub fn indexOfNeedsEscape(slice: []const u8) ?u32 { +pub fn indexOfNeedsEscape(slice: []const u8, comptime quote_char: u8) ?u32 { var remaining = slice; if (remaining.len == 0) return null; - if (remaining[0] >= 127 or remaining[0] < 0x20 or remaining[0] == '\\' or remaining[0] == '"') { + if (remaining[0] >= 127 or remaining[0] < 0x20 or remaining[0] == '\\' or remaining[0] == quote_char or (quote_char == '`' and remaining[0] == '$')) { return 0; } if (comptime Environment.enableSIMD) { while (remaining.len >= ascii_vector_size) { const vec: AsciiVector = remaining[0..ascii_vector_size].*; - const cmp = @as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) | @as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) | + const cmp: AsciiVectorU1 = if (comptime quote_char == '`') ( // + @as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) | + @as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) | @as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\\'))))) | - @as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '"'))))); + @as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, quote_char))))) | + @as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '$'))))) // + ) else ( // + @as(AsciiVectorU1, @bitCast((vec > max_16_ascii))) | + @as(AsciiVectorU1, @bitCast((vec < min_16_ascii))) | + @as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, '\\'))))) | + @as(AsciiVectorU1, @bitCast(vec == @as(AsciiVector, @splat(@as(u8, quote_char))))) // + ); if (@reduce(.Max, cmp) > 0) { const bitmask = @as(AsciiVectorInt, @bitCast(cmp)); @@ -4259,7 +4296,7 @@ pub fn indexOfNeedsEscape(slice: []const u8) ?u32 { for (remaining) |*char_| { const char = char_.*; - if (char > 127 or char < 0x20 or char == '\\' or char == '"') { + if (char > 127 or char < 0x20 or char == '\\' or char == quote_char or (quote_char == '`' and char == '$')) { return @as(u32, @truncate(@intFromPtr(char_) - @intFromPtr(slice.ptr))); } } diff --git a/src/toml/toml_lexer.zig b/src/toml/toml_lexer.zig index 34f3646de11871..5984f33d26cfc4 100644 --- a/src/toml/toml_lexer.zig +++ b/src/toml/toml_lexer.zig @@ -1184,7 +1184,7 @@ pub const Lexer = struct { } return js_ast.Expr.init( - js_ast.E.UTF8String, + js_ast.E.String, .{ .data = lexer.string_literal_slice }, loc_, ); diff --git a/test/bundler/bundler_minify.test.ts b/test/bundler/bundler_minify.test.ts index 9919ed6cf76839..8d07f3727c1f3f 100644 --- a/test/bundler/bundler_minify.test.ts +++ b/test/bundler/bundler_minify.test.ts @@ -3,9 +3,6 @@ import { itBundled } from "./expectBundled"; describe("bundler", () => { itBundled("minify/TemplateStringFolding", { - // TODO: https://github.com/oven-sh/bun/issues/4217 - todo: true, - files: { "/entry.js": /* js */ ` capture(\`\${1}-\${2}-\${3}-\${null}-\${undefined}-\${true}-\${false}\`); @@ -28,6 +25,11 @@ describe("bundler", () => { capture(\`😋📋👌\`.length == 6) capture(\`😋📋👌\`.length === 2) capture(\`😋📋👌\`.length == 2) + capture(\`\\n\`.length) + capture(\`\n\`.length) + capture("\\uD800\\uDF34".length) + capture("\\u{10334}".length) + capture("𐌴".length) `, }, capture: [ @@ -51,6 +53,11 @@ describe("bundler", () => { "!0", "!1", "!1", + "1", + "1", + "2", + "2", + "2", ], minifySyntax: true, target: "bun", @@ -475,9 +482,11 @@ describe("bundler", () => { capture(+'-123.567'); capture(+'8.325'); capture(+'100000000'); - // unsupported capture(+'\\u0030\\u002e\\u0031'); capture(+'\\x30\\x2e\\x31'); + capture(+'NotANumber'); + // not supported + capture(+'æ'); `, }, minifySyntax: true, @@ -486,9 +495,11 @@ describe("bundler", () => { "-123.567", "8.325", "1e8", + "0.1", + "0.1", + "NaN", // untouched - "+\"0.1\"", - "+\"0.1\"", + '+"æ"', ], }); }); diff --git a/test/bundler/bundler_npm.test.ts b/test/bundler/bundler_npm.test.ts index 73d4b1556ef709..58eb0aa8f27d12 100644 --- a/test/bundler/bundler_npm.test.ts +++ b/test/bundler/bundler_npm.test.ts @@ -58,16 +58,16 @@ describe("bundler", () => { ], mappings: [ ["react.development.js:524:'getContextName'", "1:5426:Y1"], - ["react.development.js:2495:'actScopeDepth'", "1:26051:GJ++"], + ["react.development.js:2495:'actScopeDepth'", "23:4092:GJ++"], ["react.development.js:696:''Component'", '1:7488:\'Component "%s"'], - ["entry.tsx:6:'\"Content-Type\"'", '1:221651:"Content-Type"'], - ["entry.tsx:11:''", "1:221905:void"], - ["entry.tsx:23:'await'", "1:222005:await"], + ["entry.tsx:6:'\"Content-Type\"'", '100:18849:"Content-Type"'], + ["entry.tsx:11:''", "100:19103:void"], + ["entry.tsx:23:'await'", "100:19203:await"], ], }, }, expectExactFilesize: { - "out/entry.js": 222273, + "out/entry.js": 222164, }, run: { stdout: "

Hello World

This is an example.

", diff --git a/test/bundler/bundler_string.test.ts b/test/bundler/bundler_string.test.ts index 2b5901d7826863..88efba7780bc53 100644 --- a/test/bundler/bundler_string.test.ts +++ b/test/bundler/bundler_string.test.ts @@ -11,7 +11,7 @@ interface TemplateStringTest { const templateStringTests: Record = { // note for writing tests: .print is .trim()'ed due to how run.stdout works Empty: { expr: '""', captureRaw: '""' }, - NullByte: { expr: '"hello\0"', captureRaw: '"hello\0"' }, + NullByte: { expr: '"hello\0"', captureRaw: '"hello\\x00"' }, EmptyTemplate: { expr: "``", captureRaw: '""' }, ConstantTemplate: { expr: "`asdf`", captureRaw: '"asdf"' }, AddConstant: { expr: "`${7 + 6}`", capture: true }, @@ -61,15 +61,15 @@ const templateStringTests: Record = { }, TernaryWithEscapeVariable: { expr: '`${"1"}\\${${VARIABLE ? "SOMETHING" : ""}`', - captureRaw: '`${"1"}\\${${VARIABLE?"SOMETHING":""}`', + captureRaw: '`1\\${${VARIABLE?"SOMETHING":""}`', }, TernaryWithEscapeTrue: { expr: '`${"1"}\\${${true ? "SOMETHING" : ""}`', - captureRaw: '`${"1"}\\${${"SOMETHING"}`', + captureRaw: '"1${SOMETHING"', }, TernaryWithEscapeFalse: { expr: '`${"1"}\\${${false ? "SOMETHING" : ""}`', - captureRaw: '`${"1"}\\${${""}`', + captureRaw: '"1${"', }, Fold: { expr: "`a${'b'}c${'d'}e`", capture: true }, FoldNested1: { expr: "`a${`b`}c${`${'d'}`}e`", capture: true }, diff --git a/test/bundler/expectBundled.ts b/test/bundler/expectBundled.ts index 50607cf44f0599..19e77a4481415d 100644 --- a/test/bundler/expectBundled.ts +++ b/test/bundler/expectBundled.ts @@ -574,7 +574,9 @@ function expectBundled( const entryPaths = entryPoints.map(file => path.join(root, file)); if (external) { - external = external.map(x => (typeof x !== "string" ? x : x.replace(/\{\{root\}\}/g, root))); + external = external.map(x => + typeof x !== "string" ? x : x.replaceAll("{{root}}", root.replaceAll("\\", "\\\\")), + ); } if (generateOutput === false) outputPaths = []; @@ -625,7 +627,9 @@ function expectBundled( const filename = path.join(root, file); mkdirSync(path.dirname(filename), { recursive: true }); const formattedContents = - typeof contents === "string" ? dedent(contents).replace(/\{\{root\}\}/g, root) : contents; + typeof contents === "string" + ? dedent(contents).replaceAll("{{root}}", root.replaceAll("\\", "\\\\")) + : contents; writeFileSync(filename, formattedContents); } @@ -1350,7 +1354,9 @@ for (const [key, blob] of build.outputs) { for (const [file, contents] of Object.entries(runtimeFiles ?? {})) { mkdirSync(path.dirname(path.join(root, file)), { recursive: true }); const formattedContents = - typeof contents === "string" ? dedent(contents).replace(/\{\{root\}\}/g, root) : contents; + typeof contents === "string" + ? dedent(contents).replaceAll("{{root}}", root.replaceAll("\\", "\\\\")) + : contents; writeFileSync(path.join(root, file), formattedContents); } diff --git a/test/bundler/transpiler/transpiler.test.js b/test/bundler/transpiler/transpiler.test.js index 2763f2b2af9ed6..7bb5bb1987c6a4 100644 --- a/test/bundler/transpiler/transpiler.test.js +++ b/test/bundler/transpiler/transpiler.test.js @@ -1668,8 +1668,33 @@ console.log(
);`), expectPrinted_(`import("./foo.json", { type: "json" });`, `import("./foo.json")`); }); - it("import with unicode escape", () => { - expectPrinted_(`import { name } from 'mod\\u1011';`, `import { name } from "mod\\u1011"`); + it("import with unicode", () => { + expectPrinted_(`import { name } from 'modထ';`, `import { name } from "modထ"`); + expectPrinted_(`import { name } from 'mod\\u1011';`, `import { name } from "modထ"`); + expectPrinted_(`import('modထ');`, `import("modထ")`); + expectPrinted_(`import('mod\\u1011');`, `import("modထ")`); + }); + it("import with quote", () => { + expectPrinted_(`import { name } from '".ts';`, `import { name } from '".ts'`); + }); + + it("string quote selection", () => { + expectPrinted_(`console.log("\\n")`, "console.log(`\n`)"); + expectPrinted_(`console.log("\\"")`, `console.log('"')`); + expectPrinted_(`console.log('\\'')`, `console.log("'")`); + expectPrinted_("console.log(`\\`hi\\``)", "console.log(`\\`hi\\``)"); + expectPrinted_(`console.log("ထ")`, `console.log("ထ")`); + expectPrinted_(`console.log("\\u1011")`, `console.log("ထ")`); + }); + + it("unicode surrogates", () => { + expectPrinted_(`console.log("𐌴")`, 'console.log("\\uD800\\uDF34")'); + expectPrinted_(`console.log("\\u{10334}")`, 'console.log("\\uD800\\uDF34")'); + expectPrinted_(`console.log("\\uD800\\uDF34")`, 'console.log("\\uD800\\uDF34")'); + expectPrinted_(`console.log("\\u{10334}" === "\\uD800\\uDF34")`, "console.log(true)"); + expectPrinted_(`console.log("\\u{10334}" === "\\uDF34\\uD800")`, "console.log(false)"); + expectPrintedMin_(`console.log("abc" + "def")`, 'console.log("abcdef")'); + expectPrintedMin_(`console.log("\\uD800" + "\\uDF34")`, 'console.log("\\uD800" + "\\uDF34")'); }); it("fold string addition", () => { @@ -1810,7 +1835,7 @@ export const { dead } = { dead: "hello world!" }; expect(bunTranspiler.transformSync(input, object).trim()).toBe(output); }); - it.skip("rewrite string to length", () => { + it("rewrite string to length", () => { expectBunPrinted_(`export const foo = "a".length + "b".length;`, `export const foo = 2`); // check rope string expectBunPrinted_(`export const foo = ("a" + "b").length;`, `export const foo = 2`); @@ -1819,6 +1844,8 @@ export const { dead } = { dead: "hello world!" }; `export const foo = "😋 Get Emoji — All Emojis to ✂️ Copy and 📋 Paste 👌".length;`, `export const foo = 52`, ); + // no rope string for non-ascii + expectBunPrinted_(`export const foo = ("æ" + "™").length;`, `export const foo = ("æ" + "™").length`); }); describe("Bun.js", () => { diff --git a/test/cli/install/bun-run.test.ts b/test/cli/install/bun-run.test.ts index 717836b687299c..ab3ca924289e30 100644 --- a/test/cli/install/bun-run.test.ts +++ b/test/cli/install/bun-run.test.ts @@ -521,3 +521,23 @@ it("should pass arguments correctly in scripts", async () => { expect(exitCode).toBe(0); } }); + +it("should run with bun instead of npm even with leading spaces", async () => { + const dir = tempDirWithFiles("test", { + "package.json": JSON.stringify({ + workspaces: ["a", "b"], + scripts: { "root_script": " npm run other_script ", "other_script": " echo hi " }, + }), + }); + { + const { stdout, stderr, exitCode } = spawnSync({ + cmd: [bunExe(), "run", "root_script"], + cwd: dir, + env: bunEnv, + }); + + expect(stderr.toString()).toBe("$ bun run other_script \n$ echo hi \n"); + expect(stdout.toString()).toEndWith("hi\n"); + expect(exitCode).toBe(0); + } +}); diff --git a/test/js/bun/ini/ini.test.ts b/test/js/bun/ini/ini.test.ts index 32da995aba743d..14667b673ff1da 100644 --- a/test/js/bun/ini/ini.test.ts +++ b/test/js/bun/ini/ini.test.ts @@ -48,6 +48,13 @@ wow = 'hi' expected: { hi: "\\production" }, }); + envVarTest({ + name: "backslashes", + ini: "filepath=C:\\Home\\someuser\\My Documents\nfilepath2=\\\\\\\\TwoBackslashes", + env: {}, + expected: { filepath: "C:\\Home\\someuser\\My Documents", filepath2: "\\\\TwoBackslashes" }, + }); + envVarTest({ name: "basic", ini: /* ini */ ` diff --git a/test/regression/issue/14976/14976.test.ts b/test/regression/issue/14976/14976.test.ts new file mode 100644 index 00000000000000..37e7c72df0672e --- /dev/null +++ b/test/regression/issue/14976/14976.test.ts @@ -0,0 +1,77 @@ +import { mile𐃘add1 } from "./import_target"; +import { mile𐃘add1 as m } from "./import_target"; +import * as i from "./import_target"; +import { test, expect } from "bun:test"; +import { $ } from "bun"; +import { bunExe, tempDirWithFiles } from "harness"; + +test("unicode imports", () => { + expect(mile𐃘add1(25)).toBe(26); + expect(i.mile𐃘add1(25)).toBe(26); + expect(m(25)).toBe(26); +}); + +test("more unicode imports", async () => { + const dir = tempDirWithFiles("more-unicode-imports", { + "mod_importer.ts": ` + import { nထme as nထme𐃘1 } from "./mod\\u1011.ts"; + import { nထme as nထme𐃘2 } from "./modထ.ts"; + + console.log(nထme𐃘1, nထme𐃘2); + `, + "modထ.ts": ` + export const nထme = "𐃘1"; + `, + }); + expect((await $`${bunExe()} run ${dir}/mod_importer.ts`.text()).trim()).toBe("𐃘1 𐃘1"); + console.log(await $`${bunExe()} build --target=bun ${dir}/mod_importer.ts`.text()); + console.log(await $`${bunExe()} build --target=node ${dir}/mod_importer.ts`.text()); +}); + +// prettier-ignore +test("escaped unicode variable name", () => { + let mile\u{100d8}value = 36; + expect(mile𐃘value).toBe(36); + expect(mile\u{100d8}value).toBe(36); +}); + +test("bun build --target=bun outputs only ascii", async () => { + const build_result = await Bun.build({ + entrypoints: [import.meta.dirname + "/import_target.ts"], + target: "bun", + }); + expect(build_result.success).toBe(true); + expect(build_result.outputs.length).toBe(1); + for (const byte of new Uint8Array(await build_result.outputs[0].arrayBuffer())) { + expect(byte).toBeLessThan(0x80); + } +}); + +test("string escapes", () => { + expect({ ["mile𐃘add1"]: 1 }?.mile𐃘add1).toBe(1); + expect(`\\ ' " \` $ 𐃘`).toBe([0x5c, 0x27, 0x22, 0x60, 0x24, 0x100d8].map(c => String.fromCodePoint(c)).join(" ")); + expect({ "\\": 1 }[String.fromCodePoint(0x5c)]).toBe(1); + const tag = (a: TemplateStringsArray) => a.raw; + expect(tag`$one \$two`).toEqual(["$one \\$two"]); +}); + +test("constant-folded equals doesn't lie", async () => { + expect( + "\n" === + ` +`, + ).toBe(true); + // prettier-ignore + expect( + "\a\n" === + `a +`, + ).toBe(true); + // prettier-ignore + console.log("\"" === '"'); +}); + +test.skip("template literal raw property with unicode in an ascii-only build", async () => { + expect(String.raw`你好𐃘\\`).toBe("你好𐃘\\\\"); + expect((await $`echo 你好𐃘`.text()).trim()).toBe("你好𐃘"); +}); diff --git a/test/regression/issue/14976/import_target.ts b/test/regression/issue/14976/import_target.ts new file mode 100644 index 00000000000000..b1b9a61f149c9f --- /dev/null +++ b/test/regression/issue/14976/import_target.ts @@ -0,0 +1,2 @@ +"use𐃘unicode"; +export const mile𐃘add1 = (int: number) => int + 1;