diff --git a/.github/workflows/artifact.yaml b/.github/workflows/artifact.yaml index d1f69b9e..8993cfaa 100644 --- a/.github/workflows/artifact.yaml +++ b/.github/workflows/artifact.yaml @@ -71,11 +71,12 @@ jobs: env: CC: "clang" CFLAGS: "-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm" - LDFLAGS: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow" - RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=2 -D warnings" + LDFLAGS: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow -Wl,--emit-relocs" + RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -C link-arg=-Wl,--emit-relocs -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=4 -D warnings" PATH: "/__w/orjson/orjson/.venv/bin:/github/home/.cargo/bin:/root/.local/bin:/root/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" container: image: fedora:41 + options: --cap-add SYS_ADMIN steps: - name: cpuinfo @@ -83,7 +84,7 @@ jobs: - name: Build environment pre-clone run: | - dnf install -y rustup clang lld python${{ matrix.python.version }} git + dnf install -y rustup clang lld python${{ matrix.python.version }} git llvm-bolt zip unzip perf rustup-init --default-toolchain "${RUST_TOOLCHAIN}-x86_64-unknown-linux-gnu" --profile minimal --component rust-src -y - uses: actions/checkout@v4 @@ -113,6 +114,13 @@ jobs: env: PYTHONMALLOC: "debug" + - name: bolt + run: | + source .venv/bin/activate + ./script/bolt + auditwheel show $(find target/wheels/*.whl) | grep '"manylinux_2_17_x86_64"' + uv pip install target/wheels/orjson*.whl + - run: source .venv/bin/activate && ./integration/run thread - run: source .venv/bin/activate && ./integration/run http - run: source .venv/bin/activate && ./integration/run init @@ -124,7 +132,7 @@ jobs: if: matrix.python.version != '3.8' - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/')" + # if: "startsWith(github.ref, 'refs/tags/')" uses: actions/upload-artifact@v4 with: name: orjson_manylinux_2_17_amd64_${{ matrix.python.version }} diff --git a/.gitignore b/.gitignore index fdf3c2de..77922014 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,6 @@ /include/cargo /perf.* /target +/tmp /vendor __pycache__ diff --git a/script/bolt b/script/bolt new file mode 100755 index 00000000..920cbf4e --- /dev/null +++ b/script/bolt @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +set -eou pipefail + +rm -f ./perf.fdata +rm -rf tmp + +wheel=$(find "${CARGO_TARGET_DIR}/wheels" -type f -name "orjson*.whl" -exec basename {} \;) + +unzip -q -o "${CARGO_TARGET_DIR}/wheels/${wheel}" -d tmp +sharedob=$(find tmp/orjson -type f -name "orjson*.so" -exec basename {} \;) + +installedobj=$(find .venv -type f -name "orjson*.so") +llvm-bolt "${installedobj}" --instrument --o orjson.so --jump-tables=none --align-macro-fusion=none +mv orjson.so "${installedobj}" + +echo "collecting perf data..." + +./script/profdata +mv /tmp/prof.fdata ./perf.fdata + +llvm-bolt \ + "tmp/orjson/${sharedob}" \ + --o "${sharedob}" \ + --data=perf.fdata \ + --strict \ + --use-gnu-stack \ + --assume-abi \ + --use-gnu-stack \ + --deterministic-debuginfo \ + --sequential-disassembly \ + --update-debug-sections \ + --use-old-text \ + --relocs \ + --hot-data \ + --cg-from-perf-data \ + --cg-ignore-recursive-calls \ + --inline-ap \ + --inline-small-functions \ + --inline-memcpy \ + --eliminate-unreachable \ + --plt=all \ + --peepholes=all \ + --indirect-call-promotion=all \ + --icf \ + --icp-eliminate-loads \ + --jump-tables=move \ + --reg-reassign \ + --use-aggr-reg-reassign \ + --align-macro-fusion=all \ + --frame-opt=all \ + --frame-opt-rm-stores \ + --sctc-mode=always \ + --simplify-rodata-loads \ + --simplify-conditional-tail-calls \ + --split-all-cold \ + --reorder-data-inplace \ + --reorder-blocks=ext-tsp \ + --reorder-functions=hfsort+ \ + --reorder-functions-use-hot-size \ + --strip-rep-ret \ + --x86-align-branch-boundary-hot-only \ + --x86-strip-redundant-address-size \ + --dyno-stats + +if [[ "$@" == *"strip"* ]]; then + strip --strip-debug "${sharedob}" +fi + +mv -f "${sharedob}" tmp/orjson + +./script/fix-dist-info $(find tmp -type f -name "RECORD") $(find tmp -type f -name "orjson*.so") + +cd tmp +zip -9 -r "${wheel}" -xi * +cd .. +mv -f "tmp/${wheel}" "${CARGO_TARGET_DIR}/wheels" diff --git a/script/develop b/script/develop index a6003285..cc80c50a 100755 --- a/script/develop +++ b/script/develop @@ -12,8 +12,8 @@ export CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-target}" echo "CC: ${CC}, LD: ${LD}, LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" export CFLAGS="-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm" -export LDFLAGS="-fuse-ld=${LD} -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow" -export RUSTFLAGS="-C linker=${CC} -C link-arg=-fuse-ld=${LD} -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=8" +export LDFLAGS="-fuse-ld=${LD} -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow -Wl,--emit-relocs" +export RUSTFLAGS="-C linker=${CC} -C link-arg=-fuse-ld=${LD} -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -C link-arg=-Wl,--emit-relocs -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=8" maturin build --target="${TARGET}" "$@" diff --git a/script/fix-dist-info b/script/fix-dist-info new file mode 100755 index 00000000..aa228d49 --- /dev/null +++ b/script/fix-dist-info @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (Apache-2.0 OR MIT) +# usage: +# ./script/fix-dist-info $(find tmp -type f -name "RECORD") $(find tmp -type f -name "orjson*.so") + +import base64 +import hashlib +import pathlib +import sys + +record_path = pathlib.Path(sys.argv[1]) +so_path = pathlib.Path(sys.argv[2]) +so_name = sys.argv[2].split("/")[-1] + +print(f"Fixing hash of {so_path} in {record_path} ...") + +so_bytes = so_path.read_bytes() +so_len = len(so_bytes) + +# URL_SAFE_NO_PAD.encode(Sha256::digest(bytes)); +so_hash = ( + base64.urlsafe_b64encode(hashlib.sha256(so_bytes).digest()) + .rstrip(b"=") + .decode("utf-8") +) + +record_original = record_path.read_bytes().decode("utf-8") +record_fixed = [] +for line in record_original.split("\n"): + print(line) + if line.startswith(f"orjson/{so_name}"): + new_line = f"orjson/{so_name},sha256={so_hash},{so_len}" + print(f"fixed: {new_line}") + record_fixed.append(new_line) + else: + record_fixed.append(line) + +record_path.write_bytes("\n".join(record_fixed).encode("utf-8")) diff --git a/script/lint b/script/lint index 22f5742e..307d33f3 100755 --- a/script/lint +++ b/script/lint @@ -2,8 +2,10 @@ set -eou pipefail -to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py script/pydataclass script/pymem -script/pysort script/pynumpy script/pynonstr script/pycorrectness script/graph integration/init +to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py +script/fix-dist-info script/profdata +script/pydataclass script/pymem script/pysort script/pynumpy +script/pynonstr script/pycorrectness script/graph integration/init integration/wsgi.py integration/typestubs.py integration/thread" ruff check ${to_lint} --fix diff --git a/script/profdata b/script/profdata new file mode 100755 index 00000000..7e9c1f42 --- /dev/null +++ b/script/profdata @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import datetime +import gc +import lzma +import os +import sys + +import pytest +from faker import Faker + +from orjson import ( + OPT_APPEND_NEWLINE, + OPT_INDENT_2, + OPT_NAIVE_UTC, + OPT_OMIT_MICROSECONDS, + dumps, + loads, +) + +os.sched_setaffinity(os.getpid(), {0, 1}) +gc.disable() + +NUM_LOOPS = 100 +NUM_ENTRIES = 5 + +FAKER_LOCALES = [ + "ar_AA", + "fi_FI", + "fil_PH", + "he_IL", + "ja_JP", + "th_TH", + "tr_TR", + "uk_UA", + "vi_VN", +] + +NOW = datetime.datetime.now() + + +def fake(): + fake = Faker(FAKER_LOCALES) + profile_keys = tuple(set(fake.profile().keys()) - {"current_location"}) + + DATA = [ + [ + { + "id": "jzoijxzzoijx", + "created_at": NOW, + "person": fake.profile(profile_keys), + "emoji": fake.emoji(), + "😾": "", + "a": 12123, + "int": 19298012910, + } + for _ in range(0, NUM_ENTRIES) + ] + for _ in range(0, NUM_LOOPS) + ] + + orjson_opt = OPT_NAIVE_UTC | OPT_OMIT_MICROSECONDS + for each in DATA: + loads(dumps(each, option=orjson_opt)) + + +def _run_fixture(fixture: str, iterations: int): + with lzma.open(fixture, "r") as fileh: + file_bytes = fileh.read() + + for _ in range(iterations): + loads(file_bytes) + + file_obj = loads(file_bytes) + for _ in range(iterations * 2): + dumps(file_obj) + + +N = 5 + + +def fixture(): + _run_fixture("data/github.json.xz", 40000 * N) + _run_fixture("data/twitter.json.xz", 10000 * N) + _run_fixture("data/citm_catalog.json.xz", 500 * N) + _run_fixture("data/canada.json.xz", 50 * N) + + +def run_pytest(): + pytest.main( + [ + "--quiet", + "--ignore=test/test_api.py", + "--ignore=test/test_fixture.py", + "--ignore=test/test_fake.py", + "--ignore=test/test_ujson.py", + "test", + ] + ) + + +if __name__ == "__main__": + run_pytest() + fake() + fixture()