From a84bb6ecb71b73c01adc88e460d7cef14c1575a6 Mon Sep 17 00:00:00 2001 From: ijl Date: Tue, 5 Nov 2024 15:15:03 +0000 Subject: [PATCH] bolt -instrument --- .github/workflows/artifact.yaml | 12 +++- .gitignore | 1 + script/bolt | 78 ++++++++++++++++++++ script/develop | 5 +- script/fix-dist-info | 38 ++++++++++ script/profdata | 123 ++++++++++++++++++++++++++++++++ 6 files changed, 253 insertions(+), 4 deletions(-) create mode 100755 script/bolt create mode 100755 script/fix-dist-info create mode 100755 script/profdata diff --git a/.github/workflows/artifact.yaml b/.github/workflows/artifact.yaml index eae5339d..cc13dff7 100644 --- a/.github/workflows/artifact.yaml +++ b/.github/workflows/artifact.yaml @@ -78,8 +78,8 @@ jobs: cc: "clang", cflags: "-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm", features: "avx512,no-panic,unstable-simd,yyjson", - ldflags: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow", - rustflags: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z threads=4 -D warnings", + ldflags: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow -Wl,--emit-relocs", + rustflags: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -C link-arg=-Wl,--emit-relocs -Z mir-opt-level=4 -Z threads=2 -D warnings" tag: null, target: "x86_64-unknown-linux-gnu", }, @@ -126,6 +126,7 @@ jobs: export PATH="/root/.cargo/bin:/home/runner/work/orjson/orjson/.venv:/home/runner/.cargo/bin:$PATH" ./script/install-fedora + dnf install --setopt=install_weak_deps=false -y llvm-bolt zip unzip perf source "${VENV}/bin/activate" @@ -137,6 +138,13 @@ jobs: uv pip install ${CARGO_TARGET_DIR}/wheels/orjson*.whl + pytest -v test + ./integration/run thread + ./script/bolt + auditwheel show $(find target/wheels/*.whl) | grep '"${COMPATIBILITY}_x86_64"' + + uv pip install ${CARGO_TARGET_DIR}/wheels/orjson*.whl + pytest -v test ./integration/run thread ./integration/run http diff --git a/.gitignore b/.gitignore index fdf3c2de..77922014 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,6 @@ /include/cargo /perf.* /target +/tmp /vendor __pycache__ diff --git a/script/bolt b/script/bolt new file mode 100755 index 00000000..b214b0c8 --- /dev/null +++ b/script/bolt @@ -0,0 +1,78 @@ +#!/usr/bin/env bash + +set -eou pipefail + +rm -f ./perf.fdata +rm -rf tmp + +wheel=$(find "${CARGO_TARGET_DIR}/wheels" -type f -name "orjson*.whl" -exec basename {} \;) + +unzip -q -o "${CARGO_TARGET_DIR}/wheels/${wheel}" -d tmp +sharedob=$(find tmp/orjson -type f -name "orjson*.so" -exec basename {} \;) + +installedobj=$(find .venv -type f -name "orjson*.so") +llvm-bolt "${installedobj}" --instrument --o orjson.so +mv orjson.so "${installedobj}" + +echo "collecting perf data..." + +./script/profdata +mv /tmp/prof.fdata ./perf.fdata + +llvm-bolt \ + "tmp/orjson/${sharedob}" \ + --o "${sharedob}" \ + --data=perf.fdata \ + --strict \ + --use-gnu-stack \ + --assume-abi \ + --use-gnu-stack \ + --sequential-disassembly \ + --update-debug-sections \ + --use-old-text \ + --relocs \ + --hot-data \ + --cg-from-perf-data \ + --cg-use-split-hot-size \ + --frame-opt=all \ + --frame-opt-rm-stores \ + --inline-ap \ + --inline-small-functions \ + --inline-memcpy \ + --eliminate-unreachable \ + --peepholes=all \ + --plt=all \ + --icf \ + --icp-eliminate-loads \ + --indirect-call-promotion=all \ + --jump-tables=aggressive \ + --preserve-blocks-alignment \ + --reg-reassign \ + --sctc-mode=always \ + --shorten-instructions \ + --simplify-conditional-tail-calls \ + --simplify-rodata-loads \ + --split-all-cold \ + --reorder-blocks=cache+ \ + --reorder-functions=cdsort \ + --reorder-functions-use-hot-size \ + --strip-rep-ret \ + --use-aggr-reg-reassign \ + --use-compact-aligner \ + --use-edge-counts \ + --x86-align-branch-boundary-hot-only \ + --x86-strip-redundant-address-size \ + --dyno-stats + +if [[ "$@" == *"strip"* ]]; then + strip --strip-debug "${sharedob}" +fi + +mv -f "${sharedob}" tmp/orjson + +./script/fix-dist-info $(find tmp -type f -name "RECORD") $(find tmp -type f -name "orjson*.so") + +cd tmp +zip -9 -r "${wheel}" -xi * +cd .. +mv -f "tmp/${wheel}" "${CARGO_TARGET_DIR}/wheels" diff --git a/script/develop b/script/develop index b92a22e9..8e42e226 100755 --- a/script/develop +++ b/script/develop @@ -13,8 +13,9 @@ export CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-target}" echo "CC: ${CC}, LD: ${LD}, LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" export CFLAGS="-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm" -export LDFLAGS="-fuse-ld=${LD} -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow" -export RUSTFLAGS="-C linker=${CC} -C link-arg=-fuse-ld=${LD} -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z threads=8" + +export LDFLAGS="-fuse-ld=${LD} -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow -Wl,--emit-relocs" +export RUSTFLAGS="-C linker=${CC} -C link-arg=-fuse-ld=${LD} -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -C link-arg=-Wl,--emit-relocs -Z mir-opt-level=4 -Z threads=8" rm -f ${CARGO_TARGET_DIR}/wheels/*.whl diff --git a/script/fix-dist-info b/script/fix-dist-info new file mode 100755 index 00000000..aa228d49 --- /dev/null +++ b/script/fix-dist-info @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (Apache-2.0 OR MIT) +# usage: +# ./script/fix-dist-info $(find tmp -type f -name "RECORD") $(find tmp -type f -name "orjson*.so") + +import base64 +import hashlib +import pathlib +import sys + +record_path = pathlib.Path(sys.argv[1]) +so_path = pathlib.Path(sys.argv[2]) +so_name = sys.argv[2].split("/")[-1] + +print(f"Fixing hash of {so_path} in {record_path} ...") + +so_bytes = so_path.read_bytes() +so_len = len(so_bytes) + +# URL_SAFE_NO_PAD.encode(Sha256::digest(bytes)); +so_hash = ( + base64.urlsafe_b64encode(hashlib.sha256(so_bytes).digest()) + .rstrip(b"=") + .decode("utf-8") +) + +record_original = record_path.read_bytes().decode("utf-8") +record_fixed = [] +for line in record_original.split("\n"): + print(line) + if line.startswith(f"orjson/{so_name}"): + new_line = f"orjson/{so_name},sha256={so_hash},{so_len}" + print(f"fixed: {new_line}") + record_fixed.append(new_line) + else: + record_fixed.append(line) + +record_path.write_bytes("\n".join(record_fixed).encode("utf-8")) diff --git a/script/profdata b/script/profdata new file mode 100755 index 00000000..9efd2f48 --- /dev/null +++ b/script/profdata @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import datetime +import gc +import lzma +import os +import sys + +import pytest +from faker import Faker + +from orjson import ( + OPT_APPEND_NEWLINE, + OPT_INDENT_2, + OPT_NAIVE_UTC, + OPT_OMIT_MICROSECONDS, + dumps, + loads, +) + +os.sched_setaffinity(os.getpid(), {0, 1}) +gc.disable() + + +N = 200 + +NUM_LOOPS = 20 +NUM_ENTRIES = 5 + +FAKER_LOCALES = [ + "ar_AA", + "fi_FI", + "fil_PH", + "he_IL", + "ja_JP", + "th_TH", + "tr_TR", + "uk_UA", + "vi_VN", +] + +NOW = datetime.datetime.now() + + +class Default: + + def __str__(self): + return "aaa" + +def default(obj, /): + return str(obj) + + +def fake(): + print("fake ...") + fake = Faker(FAKER_LOCALES) + profile_keys = tuple(set(fake.profile().keys()) - {"current_location"}) + + DATA = [ + [ + { + "id": "jzoijxzzoijx", + "created_at": NOW, + "person": fake.profile(profile_keys), + "emoji": fake.emoji(), + "😾": "", + "z": Default(), + "a": 12123, + "int": 19298012910, + } + for _ in range(0, NUM_ENTRIES) + ] + for _ in range(0, NUM_LOOPS) + ] + + orjson_opt = OPT_NAIVE_UTC | OPT_OMIT_MICROSECONDS + for _ in range(N): + for each in DATA: + loads(dumps(each, default=default, option=orjson_opt)) + + +def _run_fixture(fixture: str, iterations: int): + with lzma.open(fixture, "rb") as fileh: + file_bytes = fileh.read() + + file_str = file_bytes.decode("utf-8") + + for _ in range(iterations): + dumps(loads(file_bytes)) + + for _ in range(iterations): + dumps(loads(file_str)) + + + +def fixture(): + print("fixture ...") + _run_fixture("data/github.json.xz", 500 * N) + _run_fixture("data/twitter.json.xz", 100 * N) + _run_fixture("data/citm_catalog.json.xz", 20 * N) + _run_fixture("data/canada.json.xz", 2 * N) + + +def run_pytest(): + print("pytest ...") + pytest.main( + [ + "--quiet", + "--ignore=test/test_api.py", + "--ignore=test/test_fake.py", + "--ignore=test/test_fixture.py", + "--ignore=test/test_memory.py", + "--ignore=test/test_ujson.py", + "test", + ] + ) + + +if __name__ == "__main__": + run_pytest() + fake() + fixture()