Skip to content

Commit

Permalink
Upgrade SIMDUTF, 8x faster atob (#11085)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jarred-Sumner authored May 15, 2024
1 parent 5389c7a commit b5dff55
Show file tree
Hide file tree
Showing 8 changed files with 6,799 additions and 1,074 deletions.
13 changes: 13 additions & 0 deletions bench/snippets/atob.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { bench, run } from "./runner.mjs";

function makeBenchmark(size) {
const latin1 = btoa("A".repeat(size));

bench(`atob(${size} chars)`, () => {
atob(latin1);
});
}

[32, 512, 64 * 1024, 512 * 1024, 1024 * 1024 * 8].forEach(makeBenchmark);

await run();
53 changes: 53 additions & 0 deletions src/bun.js/bindings/Base64Helpers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

#include "root.h"
#include "simdutf.h"

#include "ExceptionOr.h"

namespace Bun {

namespace Base64 {

using namespace WebCore;

ExceptionOr<String> atob(const String& encodedString)
{
if (encodedString.isEmpty())
return String();

if (!encodedString.is8Bit()) {
const auto span = encodedString.span16();
size_t expected_length = simdutf::latin1_length_from_utf16(span.size());
LChar* ptr;
WTF::String convertedString = WTF::String::createUninitialized(expected_length, ptr);
if (UNLIKELY(convertedString.isNull())) {
return WebCore::Exception { OutOfMemoryError };
}

auto result = simdutf::convert_utf16le_to_latin1_with_errors(span.data(), span.size(), reinterpret_cast<char*>(ptr));

if (result.error) {
return WebCore::Exception { InvalidCharacterError };
}
return atob(convertedString);
}

const auto span = encodedString.span8();
size_t result_length = simdutf::maximal_binary_length_from_base64(reinterpret_cast<const char*>(span.data()), encodedString.length());
LChar* ptr;
WTF::String outString = WTF::String::createUninitialized(result_length, ptr);
if (UNLIKELY(outString.isNull())) {
return WebCore::Exception { OutOfMemoryError };
}
auto result = simdutf::base64_to_binary(reinterpret_cast<const char*>(span.data()), span.size(), reinterpret_cast<char*>(ptr), simdutf::base64_default);
if (result.error != simdutf::error_code::SUCCESS) {
return WebCore::Exception { InvalidCharacterError };
}
if (result.count != result_length) {
return outString.substringSharingImpl(0, result.count);
}

return outString;
}
}
}
10 changes: 10 additions & 0 deletions src/bun.js/bindings/Base64Helpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include "root.h"

namespace Bun {

namespace Base64 {

WebCore::ExceptionOr<WTF::String> atob(const WTF::String& encodedString);

}
}
21 changes: 2 additions & 19 deletions src/bun.js/bindings/ZigGlobalObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@
#include "ZigGeneratedClasses.h"
#include "ZigSourceProvider.h"
#include "UtilInspect.h"

#include "Base64Helpers.h"
#if ENABLE(REMOTE_INSPECTOR)
#include "JavaScriptCore/RemoteInspectorServer.h"
#endif
Expand Down Expand Up @@ -190,23 +190,6 @@ static bool has_loaded_jsc = false;

Structure* createMemoryFootprintStructure(JSC::VM& vm, JSC::JSGlobalObject* globalObject);

namespace WebCore {
class Base64Utilities {
public:
static ExceptionOr<String> atob(const String& encodedString)
{
if (encodedString.isNull())
return String();

auto decodedData = base64DecodeToString(encodedString, Base64DecodeMode::DefaultValidatePaddingAndIgnoreWhitespace);
if (!decodedData)
return Exception { InvalidCharacterError };

return decodedData;
}
};

}
extern "C" WebCore::Worker* WebWorker__getParentWorker(void*);
extern "C" void JSCInitialize(const char* envp[], size_t envc, void (*onCrash)(const char* ptr, size_t length))
{
Expand Down Expand Up @@ -1523,7 +1506,7 @@ JSC_DEFINE_HOST_FUNCTION(functionATOB,
WTF::String encodedString = callFrame->uncheckedArgument(0).toWTFString(globalObject);
RETURN_IF_EXCEPTION(throwScope, JSC::JSValue::encode(JSC::JSValue {}));

auto result = WebCore::Base64Utilities::atob(encodedString);
auto result = Bun::Base64::atob(encodedString);
if (result.hasException()) {
throwException(globalObject, throwScope, createDOMException(*globalObject, result.releaseException()));
return JSC::JSValue::encode(JSC::JSValue {});
Expand Down
7 changes: 7 additions & 0 deletions src/bun.js/bindings/bun-simdutf.zig
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ pub const SIMDUTFResult = extern struct {
/// a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16)
too_large = 5,
surrogate = 6,

/// Found a character that cannot be part of a valid base64 string.
invalid_base64_character = 7,
/// The base64 input terminates with a single character, excluding padding (=).
base64_input_remainder = 8,
/// The provided buffer is too small.
output_buffer_too_small = 9,
/// Not related to validation/transcoding.
_,
};
Expand Down
Loading

0 comments on commit b5dff55

Please sign in to comment.