Skip to content

Commit

Permalink
bolt -instrument
Browse files Browse the repository at this point in the history
  • Loading branch information
ijl committed Jul 2, 2024
1 parent 60fa75b commit 2a52765
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 8 deletions.
16 changes: 12 additions & 4 deletions .github/workflows/artifact.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,20 @@ jobs:
env:
CC: "clang"
CFLAGS: "-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm"
LDFLAGS: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow"
RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=2 -D warnings"
LDFLAGS: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow -Wl,--emit-relocs"
RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -C link-arg=-Wl,--emit-relocs -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=4 -D warnings"
PATH: "/__w/orjson/orjson/.venv/bin:/github/home/.cargo/bin:/root/.local/bin:/root/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
container:
image: fedora:41
options: --cap-add SYS_ADMIN
steps:

- name: cpuinfo
run: cat /proc/cpuinfo

- name: Build environment pre-clone
run: |
dnf install -y rustup clang lld python${{ matrix.python.version }} git
dnf install -y rustup clang lld python${{ matrix.python.version }} git llvm-bolt zip unzip perf
rustup-init --default-toolchain "${RUST_TOOLCHAIN}-x86_64-unknown-linux-gnu" --profile minimal --component rust-src -y
- uses: actions/checkout@v4
Expand Down Expand Up @@ -113,6 +114,13 @@ jobs:
env:
PYTHONMALLOC: "debug"

- name: bolt
run: |
source .venv/bin/activate
./script/bolt
auditwheel show $(find target/wheels/*.whl) | grep '"manylinux_2_17_x86_64"'
uv pip install target/wheels/orjson*.whl
- run: source .venv/bin/activate && ./integration/run thread
- run: source .venv/bin/activate && ./integration/run http
- run: source .venv/bin/activate && ./integration/run init
Expand All @@ -124,7 +132,7 @@ jobs:
if: matrix.python.version != '3.8'
- name: Store wheels
if: "startsWith(github.ref, 'refs/tags/')"
# if: "startsWith(github.ref, 'refs/tags/')"
uses: actions/upload-artifact@v4
with:
name: orjson_manylinux_2_17_amd64_${{ matrix.python.version }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@
/include/cargo
/perf.*
/target
/tmp
/vendor
__pycache__
77 changes: 77 additions & 0 deletions script/bolt
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env bash

set -eou pipefail

rm -f ./perf.fdata
rm -rf tmp

wheel=$(find "${CARGO_TARGET_DIR}/wheels" -type f -name "orjson*.whl" -exec basename {} \;)

unzip -q -o "${CARGO_TARGET_DIR}/wheels/${wheel}" -d tmp
sharedob=$(find tmp/orjson -type f -name "orjson*.so" -exec basename {} \;)

installedobj=$(find .venv -type f -name "orjson*.so")
llvm-bolt "${installedobj}" --instrument --o orjson.so --jump-tables=none --align-macro-fusion=none
mv orjson.so "${installedobj}"

echo "collecting perf data..."

./script/profdata
mv /tmp/prof.fdata ./perf.fdata

llvm-bolt \
"tmp/orjson/${sharedob}" \
--o "${sharedob}" \
--data=perf.fdata \
--strict \
--use-gnu-stack \
--assume-abi \
--use-gnu-stack \
--deterministic-debuginfo \
--sequential-disassembly \
--update-debug-sections \
--use-old-text \
--relocs \
--hot-data \
--cg-from-perf-data \
--cg-ignore-recursive-calls \
--inline-ap \
--inline-small-functions \
--inline-memcpy \
--eliminate-unreachable \
--plt=all \
--peepholes=all \
--indirect-call-promotion=all \
--icf \
--icp-eliminate-loads \
--jump-tables=move \
--reg-reassign \
--use-aggr-reg-reassign \
--align-macro-fusion=all \
--frame-opt=all \
--frame-opt-rm-stores \
--sctc-mode=always \
--simplify-rodata-loads \
--simplify-conditional-tail-calls \
--split-all-cold \
--reorder-data-inplace \
--reorder-blocks=ext-tsp \
--reorder-functions=hfsort+ \
--reorder-functions-use-hot-size \
--strip-rep-ret \
--x86-align-branch-boundary-hot-only \
--x86-strip-redundant-address-size \
--dyno-stats

if [[ "$@" == *"strip"* ]]; then
strip --strip-debug "${sharedob}"
fi

mv -f "${sharedob}" tmp/orjson

./script/fix-dist-info $(find tmp -type f -name "RECORD") $(find tmp -type f -name "orjson*.so")

cd tmp
zip -9 -r "${wheel}" -xi *
cd ..
mv -f "tmp/${wheel}" "${CARGO_TARGET_DIR}/wheels"
4 changes: 2 additions & 2 deletions script/develop
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ export CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-target}"
echo "CC: ${CC}, LD: ${LD}, LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"

export CFLAGS="-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm"
export LDFLAGS="-fuse-ld=${LD} -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow"
export RUSTFLAGS="-C linker=${CC} -C link-arg=-fuse-ld=${LD} -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=8"
export LDFLAGS="-fuse-ld=${LD} -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow -Wl,--emit-relocs"
export RUSTFLAGS="-C linker=${CC} -C link-arg=-fuse-ld=${LD} -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -C link-arg=-Wl,--emit-relocs -Z mir-opt-level=4 -Z virtual-function-elimination -Z threads=8"

maturin build --target="${TARGET}" "$@"

Expand Down
38 changes: 38 additions & 0 deletions script/fix-dist-info
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: (Apache-2.0 OR MIT)
# usage:
# ./script/fix-dist-info $(find tmp -type f -name "RECORD") $(find tmp -type f -name "orjson*.so")

import base64
import hashlib
import pathlib
import sys

record_path = pathlib.Path(sys.argv[1])
so_path = pathlib.Path(sys.argv[2])
so_name = sys.argv[2].split("/")[-1]

print(f"Fixing hash of {so_path} in {record_path} ...")

so_bytes = so_path.read_bytes()
so_len = len(so_bytes)

# URL_SAFE_NO_PAD.encode(Sha256::digest(bytes));
so_hash = (
base64.urlsafe_b64encode(hashlib.sha256(so_bytes).digest())
.rstrip(b"=")
.decode("utf-8")
)

record_original = record_path.read_bytes().decode("utf-8")
record_fixed = []
for line in record_original.split("\n"):
print(line)
if line.startswith(f"orjson/{so_name}"):
new_line = f"orjson/{so_name},sha256={so_hash},{so_len}"
print(f"fixed: {new_line}")
record_fixed.append(new_line)
else:
record_fixed.append(line)

record_path.write_bytes("\n".join(record_fixed).encode("utf-8"))
6 changes: 4 additions & 2 deletions script/lint
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

set -eou pipefail

to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py script/pydataclass script/pymem
script/pysort script/pynumpy script/pynonstr script/pycorrectness script/graph integration/init
to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py
script/fix-dist-info script/profdata
script/pydataclass script/pymem script/pysort script/pynumpy
script/pynonstr script/pycorrectness script/graph integration/init
integration/wsgi.py integration/typestubs.py integration/thread"

ruff check ${to_lint} --fix
Expand Down
106 changes: 106 additions & 0 deletions script/profdata
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: (Apache-2.0 OR MIT)

import datetime
import gc
import lzma
import os
import sys

import pytest
from faker import Faker

from orjson import (
OPT_APPEND_NEWLINE,
OPT_INDENT_2,
OPT_NAIVE_UTC,
OPT_OMIT_MICROSECONDS,
dumps,
loads,
)

os.sched_setaffinity(os.getpid(), {0, 1})
gc.disable()

NUM_LOOPS = 100
NUM_ENTRIES = 5

FAKER_LOCALES = [
"ar_AA",
"fi_FI",
"fil_PH",
"he_IL",
"ja_JP",
"th_TH",
"tr_TR",
"uk_UA",
"vi_VN",
]

NOW = datetime.datetime.now()


def fake():
fake = Faker(FAKER_LOCALES)
profile_keys = tuple(set(fake.profile().keys()) - {"current_location"})

DATA = [
[
{
"id": "jzoijxzzoijx",
"created_at": NOW,
"person": fake.profile(profile_keys),
"emoji": fake.emoji(),
"😾": "",
"a": 12123,
"int": 19298012910,
}
for _ in range(0, NUM_ENTRIES)
]
for _ in range(0, NUM_LOOPS)
]

orjson_opt = OPT_NAIVE_UTC | OPT_OMIT_MICROSECONDS
for each in DATA:
loads(dumps(each, option=orjson_opt))


def _run_fixture(fixture: str, iterations: int):
with lzma.open(fixture, "r") as fileh:
file_bytes = fileh.read()

for _ in range(iterations):
loads(file_bytes)

file_obj = loads(file_bytes)
for _ in range(iterations * 2):
dumps(file_obj)


N = 5


def fixture():
_run_fixture("data/github.json.xz", 40000 * N)
_run_fixture("data/twitter.json.xz", 10000 * N)
_run_fixture("data/citm_catalog.json.xz", 500 * N)
_run_fixture("data/canada.json.xz", 50 * N)


def run_pytest():
pytest.main(
[
"--quiet",
"--ignore=test/test_api.py",
"--ignore=test/test_fixture.py",
"--ignore=test/test_fake.py",
"--ignore=test/test_ujson.py",
"test",
]
)


if __name__ == "__main__":
run_pytest()
fake()
fixture()

0 comments on commit 2a52765

Please sign in to comment.