From 93c36b5300bab05b7a78d8a9137925b03dfd2852 Mon Sep 17 00:00:00 2001 From: ijl Date: Wed, 24 Apr 2024 14:48:32 +0000 Subject: [PATCH] bolt -instrument --- .github/workflows/artifact.yaml | 17 +++-- .gitignore | 1 + script/bolt | 67 ++++++++++++++++++++ script/develop | 4 +- script/graph | 2 +- script/lint | 2 +- script/profdata | 106 ++++++++++++++++++++++++++++++++ 7 files changed, 189 insertions(+), 10 deletions(-) create mode 100755 script/bolt create mode 100755 script/profdata diff --git a/.github/workflows/artifact.yaml b/.github/workflows/artifact.yaml index 9b0e9b08..25408fab 100644 --- a/.github/workflows/artifact.yaml +++ b/.github/workflows/artifact.yaml @@ -70,20 +70,20 @@ jobs: ] env: CC: "clang" - CFLAGS: "-Os -fstrict-aliasing -flto=full" - LDFLAGS: "-fuse-ld=lld -Wl,--as-needed" - RUSTFLAGS: "-C linker=clang -C lto=fat -C link-arg=-fuse-ld=lld -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=4 -D warnings" + CFLAGS: "-Os -fstrict-aliasing -fno-plt -flto=full" + LDFLAGS: "-fuse-ld=lld -Wl,--as-needed -Wl,--emit-relocs" + RUSTFLAGS: "-C linker=clang -C lto=fat -C link-arg=-fuse-ld=lld -C link-arg=-Wl,--emit-relocs -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=4 -D warnings" PATH: "/__w/orjson/orjson/.venv/bin:/github/home/.cargo/bin:/root/.local/bin:/root/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" container: image: fedora:41 - options: --user 0 + options: --user 0 --cap-add SYS_ADMIN steps: - uses: actions/checkout@v4 - name: Build environment run: | - dnf install -y rustup clang lld python${{ matrix.python.version }} + dnf install -y rustup clang lld python${{ matrix.python.version }} llvm-bolt zip unzip perf rustup-init --default-toolchain "${RUST_TOOLCHAIN}-x86_64-unknown-linux-gnu" --profile minimal --component rust-src -y cargo fetch --target=x86_64-unknown-linux-gnu & @@ -104,6 +104,11 @@ jobs: --target=x86_64-unknown-linux-gnu uv pip install target/wheels/orjson*.whl + - name: bolt + run: | + ./script/bolt + uv pip install target/wheels/orjson*.whl + - run: pytest -s -rxX -v -n 4 test env: PYTHONMALLOC: "debug" @@ -113,7 +118,7 @@ jobs: - run: ./integration/run init - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/')" + # if: "startsWith(github.ref, 'refs/tags/')" uses: actions/upload-artifact@v4 with: name: orjson_manylinux_2_17_amd64_${{ matrix.python.version }} diff --git a/.gitignore b/.gitignore index fdf3c2de..77922014 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,6 @@ /include/cargo /perf.* /target +/tmp /vendor __pycache__ diff --git a/script/bolt b/script/bolt new file mode 100755 index 00000000..75ca9ec0 --- /dev/null +++ b/script/bolt @@ -0,0 +1,67 @@ +#!/usr/bin/env bash + +set -eou pipefail + +rm -f ./perf.fdata +rm -rf tmp + +wheel=$(find target/wheels -type f -name "orjson*.whl" -exec basename {} \;) + +unzip -q -o "target/wheels/${wheel}" -d tmp +sharedob=$(find tmp/orjson -type f -name "orjson*.so" -exec basename {} \;) + +installedobj=$(find .venv -type f -name "orjson*.so") +llvm-bolt "${installedobj}" -instrument -o orjson.so +mv orjson.so "${installedobj}" + +echo "collecting perf data..." + +./script/profdata +mv /tmp/prof.fdata ./perf.fdata + +llvm-bolt \ + "tmp/orjson/${sharedob}" \ + -o "${sharedob}" \ + -data=perf.fdata \ + -strict \ + -deterministic-debuginfo \ + -sequential-disassembly \ + -update-debug-sections \ + -use-old-text \ + -relocs \ + -hot-data \ + -inline-ap \ + -cg-from-perf-data \ + -inline-small-functions \ + -inline-memcpy \ + -eliminate-unreachable \ + -plt=all \ + -peepholes=all \ + -indirect-call-promotion=all \ + -icp-eliminate-loads \ + -jump-tables=move \ + -reg-reassign \ + -use-aggr-reg-reassign \ + -align-macro-fusion=hot \ + -frame-opt=all \ + -frame-opt-rm-stores \ + -split-functions \ + -split-strategy=profile2 \ + -split-all-cold \ + -simplify-rodata-loads \ + -simplify-conditional-tail-calls \ + -sctc-mode=always \ + -reorder-data-inplace \ + -reorder-blocks=ext-tsp \ + -reorder-functions=cdsort \ + -strip-rep-ret \ + -x86-align-branch-boundary-hot-only \ + -x86-strip-redundant-address-size \ + -dyno-stats + +mv -f "${sharedob}" tmp/orjson + +cd tmp +zip -9 -r "${wheel}" -xi * +cd .. +mv -f "tmp/${wheel}" target/wheels diff --git a/script/develop b/script/develop index a4207166..4b5f9166 100755 --- a/script/develop +++ b/script/develop @@ -10,8 +10,8 @@ export LD="${LD:-lld}" echo "CC: ${CC}, LD: ${LD}, LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" export CFLAGS="-Os -fstrict-aliasing -fno-plt -flto=full" -export LDFLAGS="-fuse-ld=${LD} -Wl,--as-needed" -export RUSTFLAGS="-C linker=${CC} -C lto=fat -C link-arg=-fuse-ld=${LD} -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=8" +export LDFLAGS="-fuse-ld=${LD} -Wl,--as-needed -Wl,--emit-relocs" +export RUSTFLAGS="-C linker=${CC} -C lto=fat -C link-arg=-fuse-ld=${LD} -C link-arg=-Wl,--emit-relocs -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=8" maturin build "$@" diff --git a/script/graph b/script/graph index 72bcdd95..014ab95f 100755 --- a/script/graph +++ b/script/graph @@ -92,7 +92,7 @@ def tab(obj): if not per_op_data: continue - clamp = 10 if operation == "serialization" else 5 + clamp = 12 if operation == "serialization" else 6 json_baseline = {} for each in per_op_data: diff --git a/script/lint b/script/lint index 22f5742e..4f9f86fc 100755 --- a/script/lint +++ b/script/lint @@ -2,7 +2,7 @@ set -eou pipefail -to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py script/pydataclass script/pymem +to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py script/profdata script/pydataclass script/pymem script/pysort script/pynumpy script/pynonstr script/pycorrectness script/graph integration/init integration/wsgi.py integration/typestubs.py integration/thread" diff --git a/script/profdata b/script/profdata new file mode 100755 index 00000000..d94846cc --- /dev/null +++ b/script/profdata @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import datetime +import gc +import lzma +import os +import sys + +import pytest +from faker import Faker + +from orjson import ( + OPT_APPEND_NEWLINE, + OPT_INDENT_2, + OPT_NAIVE_UTC, + OPT_OMIT_MICROSECONDS, + dumps, + loads, +) + +os.sched_setaffinity(os.getpid(), {0, 1}) +gc.disable() + +NUM_LOOPS = 100 +NUM_ENTRIES = 5 + +FAKER_LOCALES = [ + "ar_AA", + "fi_FI", + "fil_PH", + "he_IL", + "ja_JP", + "th_TH", + "tr_TR", + "uk_UA", + "vi_VN", +] + +NOW = datetime.datetime.now() + + +def fake(): + fake = Faker(FAKER_LOCALES) + profile_keys = tuple(set(fake.profile().keys()) - {"current_location"}) + + DATA = [ + [ + { + "id": "jzoijxzzoijx", + "created_at": NOW, + "person": fake.profile(profile_keys), + "emoji": fake.emoji(), + "😾": "", + "a": 0, + "int": 19298012910, + } + for _ in range(0, NUM_ENTRIES) + ] + for _ in range(0, NUM_LOOPS) + ] + + orjson_opt = OPT_NAIVE_UTC | OPT_OMIT_MICROSECONDS + for each in DATA: + loads(dumps(each, option=orjson_opt)) + + +def _run_fixture(fixture: str, iterations: int): + with lzma.open(fixture, "r") as fileh: + file_bytes = fileh.read() + + for _ in range(iterations): + loads(file_bytes) + + file_obj = loads(file_bytes) + for _ in range(iterations * 2): + dumps(file_obj) + + +N = 1 + + +def fixture(): + _run_fixture("data/github.json.xz", 20000 * N) + _run_fixture("data/twitter.json.xz", 10000 * N) + _run_fixture("data/citm_catalog.json.xz", 500 * N) + _run_fixture("data/canada.json.xz", 50 * N) + + +def run_pytest(): + pytest.main( + [ + "--quiet", + "--ignore=test/test_api.py", + "--ignore=test/test_fixture.py", + "--ignore=test/test_fake.py", + "--ignore=test/test_ujson.py", + "test", + ] + ) + + +if __name__ == "__main__": + run_pytest() + fake() + fixture()