From 2821aa08caa81aaf7a22aa8885f9f19ec696244c Mon Sep 17 00:00:00 2001 From: Sylvain Bellemare Date: Fri, 15 May 2020 16:31:25 -0500 Subject: [PATCH] Run benchmark tests "logic only" on Travis CI The benchmark tests are run only once without reports. --- .ci/run-benchmark-tests-logic-only.sh | 15 ++++++ .gitignore | 3 ++ .travis.yml | 5 ++ benchmark/README.md | 61 ++++++++++++++++++++++- benchmark/test_benchmark_hbavss.py | 10 ++-- benchmark/test_benchmark_jubjub.py | 15 ++++-- benchmark/test_benchmark_mimc.py | 7 ++- benchmark/test_benchmark_polynomial.py | 22 ++++++-- benchmark/test_benchmark_preprocessing.py | 11 ++-- benchmark/test_benchmark_rbc.py | 6 +-- benchmark/test_benchmark_reed_solomon.py | 12 +++-- benchmark/test_benchmark_refinement.py | 4 +- pytest.ini | 5 +- tests/fixtures.py | 2 +- 14 files changed, 148 insertions(+), 30 deletions(-) create mode 100755 .ci/run-benchmark-tests-logic-only.sh diff --git a/.ci/run-benchmark-tests-logic-only.sh b/.ci/run-benchmark-tests-logic-only.sh new file mode 100755 index 00000000..2df542e2 --- /dev/null +++ b/.ci/run-benchmark-tests-logic-only.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -ev + +if [ -z $1 ]; then + tag=$IMAGE_TAG +else + tag=$1 +fi + +docker run -it $tag \ + pytest --verbose \ + --benchmark-disable \ + -m skip_bench \ + benchmark/ diff --git a/.gitignore b/.gitignore index 286ac8fc..49883257 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,6 @@ sharedata/ .ci/deploy_key .ci/deploy_key.pub + +# benchmark +.benchmarks diff --git a/.travis.yml b/.travis.yml index 01b08d89..d08e7049 100644 --- a/.travis.yml +++ b/.travis.yml @@ -54,6 +54,11 @@ jobs: install: .ci/pull-or-build-image.sh script: .ci/run-tests.sh after_success: .ci/upload-coverage-report.sh + - name: Benchmark Tests - logic only + env: + - SETUP_EXTRAS: "tests" + install: .ci/pull-or-build-image.sh + script: .ci/run-benchmark-tests-logic-only.sh - if: | type = pull_request OR \ repo != initc3/HoneyBadgerMPC OR \ diff --git a/benchmark/README.md b/benchmark/README.md index 72b56a22..d831c837 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -3,4 +3,63 @@ * Add a file in this folder with the prefix `test_benchmark` in the name. * Follow other benchmarks already written on how to write benchmark code. You can also refer to the [official documentation](https://pytest-benchmark.readthedocs.io/en/latest/). * To run all benchmarks: `pytest -v benchmark` -* To run a single benchmark: `pytest -v benchmark -k ` \ No newline at end of file +* To run a single benchmark: `pytest -v benchmark -k ` + +## Disabling Benchmark Tests +It's possible to test only the logic of the benchmark tests with the +[`--benchmark-disable`](https://pytest-benchmark.readthedocs.io/en/latest/usage.html#commandline-options) option. + +> Benchmarked functions are only ran once and no stats are reported. +> Use this if you want to run the test but don't do any benchmarking. + +Example: + +```shell +$ pytest -v --benchmark-disable benchmark/ +``` + +## Reducing Parametrization +Many tests are parametrized such that the same logic will be executed +multiple times, for different parameter values. In order to allow for +only testing one set of parameter values, meaning running a particular +test only once, as opposed to running it multiple times with different +parameter values, the custom "boolean" `pytest` marker, `skip_bench` is +available. To set `skip_bench` to `True` use the option +`-m skip_bench`: + +```shell +$ pytest -v -m skip_bench benchmark/ +``` + +For instance, without `-m skip_bench`: + +```shell +$ pytest -v benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft + +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[1] +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[3] +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[5] +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[10] +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[25] +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[33] +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[50] +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[100] +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[256] +``` + +and with `-m skip_bench`: + +```shell +pytest -v -m skip_bench benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft + +benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[1] +``` + +## Logic Only Benchmark Tests Execution +To only check whether the benchmark tests actually run properly, +without benchmarking them and without testing for many sets of +paramter values, use both `--benchmark-disable` and `-m skip_bench`: + +```shell +$ pytest -v --benchmark-disable -m skip_bench benchmark/ +``` diff --git a/benchmark/test_benchmark_hbavss.py b/benchmark/test_benchmark_hbavss.py index b609692f..c5a3cd6f 100644 --- a/benchmark/test_benchmark_hbavss.py +++ b/benchmark/test_benchmark_hbavss.py @@ -2,7 +2,7 @@ from contextlib import ExitStack from random import randint -from pytest import mark +from pytest import mark, param from honeybadgermpc.betterpairing import G1, ZR from honeybadgermpc.elliptic_curve import Subgroup @@ -24,7 +24,7 @@ def get_avss_params(n, t): @mark.parametrize( "t, k", [ - (1, 5), + param(1, 5, marks=mark.skip_bench), (3, 5), (5, 5), (16, 5), @@ -70,7 +70,7 @@ def _prog(): @mark.parametrize( "t, k", [ - (1, 5), + param(1, 5, marks=mark.skip_bench), (3, 5), (5, 5), (16, 5), @@ -113,7 +113,7 @@ def _prog(): @mark.parametrize( "t, k", [ - (1, 5), + param(1, 5, marks=mark.skip_bench), (3, 5), (5, 5), (16, 5), @@ -160,7 +160,7 @@ def _prog(): @mark.parametrize( "t, k", [ - (1, 5), + param(1, 5, marks=mark.skip_bench), (3, 5), (5, 5), (16, 5), diff --git a/benchmark/test_benchmark_jubjub.py b/benchmark/test_benchmark_jubjub.py index 7654d6ee..91ed0096 100644 --- a/benchmark/test_benchmark_jubjub.py +++ b/benchmark/test_benchmark_jubjub.py @@ -1,4 +1,4 @@ -from pytest import mark +from pytest import mark, param from honeybadgermpc.elliptic_curve import Jubjub, Point from honeybadgermpc.progs.jubjub import SharedPoint, share_mul @@ -32,7 +32,11 @@ TEST_CURVE, ) # noqa: E501 -ALL_BIT_NUMBERS = [int(f"0b{'1' * i}", 2) for i in [1, 64, 128]] + +ALL_BIT_NUMBERS = [ + param(int(f"0b{'1' * i}", 2), marks=(mark.skip_bench if i == 1 else [])) + for i in [1, 64, 128] +] n, t = 4, 1 k = 50000 @@ -45,6 +49,7 @@ def run_benchmark( runner(prog, n, t, preprocessing, k, mixins) +@mark.skip_bench def test_benchmark_shared_point_add(benchmark_runner): async def _prog(context): result = SharedPoint.from_point(context, TEST_POINT) @@ -54,6 +59,7 @@ async def _prog(context): run_benchmark(benchmark_runner, _prog) +@mark.skip_bench def test_benchmark_shared_point_double(benchmark_runner): async def _prog(context): result = SharedPoint.from_point(context, TEST_POINT) @@ -87,7 +93,10 @@ async def _prog(context): run_benchmark(benchmark_runner, _prog) -@mark.parametrize("bit_length", list(range(64, 257, 64))) +@mark.parametrize( + "bit_length", + [param(i, marks=mark.skip_bench) if i == 64 else i for i in range(64, 257, 64)], +) def test_benchmark_share_mul(bit_length, benchmark_runner): p = TEST_POINT diff --git a/benchmark/test_benchmark_mimc.py b/benchmark/test_benchmark_mimc.py index 2010e7ec..3a276158 100644 --- a/benchmark/test_benchmark_mimc.py +++ b/benchmark/test_benchmark_mimc.py @@ -1,6 +1,6 @@ from random import randint -from pytest import mark +from pytest import mark, param from honeybadgermpc.elliptic_curve import Jubjub from honeybadgermpc.progs.mimc import mimc_mpc_batch @@ -34,7 +34,10 @@ # All iterations take around 30min total. -@mark.parametrize("batch_size", [10 ** i for i in range(4)]) +@mark.parametrize( + "batch_size", + [param(10 ** i, marks=mark.skip_bench) if i == 0 else 10 ** i for i in range(4)], +) def test_benchmark_mimc_mpc_batch(batch_size, benchmark_runner): async def _prog(context): xs = [context.preproc.get_rand(context) for _ in range(batch_size)] diff --git a/benchmark/test_benchmark_polynomial.py b/benchmark/test_benchmark_polynomial.py index b73d5771..539d449d 100644 --- a/benchmark/test_benchmark_polynomial.py +++ b/benchmark/test_benchmark_polynomial.py @@ -1,6 +1,6 @@ from random import randint -from pytest import mark +from pytest import mark, param from honeybadgermpc.ntl import fft_interpolate, lagrange_interpolate from honeybadgermpc.polynomial import get_omega @@ -19,26 +19,38 @@ def get_points(n, galois_field): return x, y, points, omega -@mark.parametrize("n", [2 ** i for i in range(4, 11, 2)]) +@mark.parametrize( + "n", + [param(2 ** i, marks=(mark.skip_bench if i == 4 else [])) for i in range(4, 11, 2)], +) def test_benchmark_lagrange_interpolate_python(benchmark, n, galois_field, polynomial): _, _, points, _ = get_points(n, galois_field) benchmark(polynomial.interpolate, points) -@mark.parametrize("n", [2 ** i for i in range(4, 11, 2)]) +@mark.parametrize( + "n", + [param(2 ** i, marks=(mark.skip_bench if i == 4 else [])) for i in range(4, 11, 2)], +) def test_benchmark_lagrange_interpolate_cpp(benchmark, n, galois_field): x, y, _, _ = get_points(n, galois_field) p = galois_field.modulus benchmark(lagrange_interpolate, x, y, p) -@mark.parametrize("n", [2 ** i for i in range(4, 21, 4)]) +@mark.parametrize( + "n", + [param(2 ** i, marks=(mark.skip_bench if i == 4 else [])) for i in range(4, 21, 4)], +) def test_benchmark_fft_interpolate_python(benchmark, n, galois_field, polynomial): _, y, _, omega = get_points(n, galois_field) benchmark(polynomial.interpolate_fft, y, omega) -@mark.parametrize("n", [2 ** i for i in range(4, 21, 4)]) +@mark.parametrize( + "n", + [param(2 ** i, marks=(mark.skip_bench if i == 4 else [])) for i in range(4, 21, 4)], +) def test_benchmark_fft_interpolate_cpp(benchmark, n, galois_field, polynomial): _, y, _, omega = get_points(n, galois_field) n = len(y) diff --git a/benchmark/test_benchmark_preprocessing.py b/benchmark/test_benchmark_preprocessing.py index 664ae98b..04509793 100644 --- a/benchmark/test_benchmark_preprocessing.py +++ b/benchmark/test_benchmark_preprocessing.py @@ -1,16 +1,21 @@ -from pytest import mark +from pytest import mark, param from honeybadgermpc.preprocessing import PreProcessedElements -@mark.parametrize("n,t,k", [(4, 1, 1024), (16, 5, 512), (50, 15, 256)]) +@mark.parametrize( + "n,t,k", [param(4, 1, 1024, marks=mark.skip_bench), (16, 5, 512), (50, 15, 256)] +) def test_benchmark_generate_rands(benchmark, n, t, k): pp_elements = PreProcessedElements() pp_elements.clear_preprocessing() benchmark(pp_elements.generate_rands, k, n, t) -@mark.parametrize("n,t,k,z", [(4, 1, 64, 64), (16, 5, 32, 32), (61, 20, 32, 32)]) +@mark.parametrize( + "n,t,k,z", + [param(4, 1, 64, 64, marks=mark.skip_bench), (16, 5, 32, 32), (61, 20, 32, 32)], +) def test_benchmark_generate_powers(benchmark, n, t, k, z): pp_elements = PreProcessedElements() pp_elements.clear_preprocessing() diff --git a/benchmark/test_benchmark_rbc.py b/benchmark/test_benchmark_rbc.py index f77351bd..0d2ee94f 100644 --- a/benchmark/test_benchmark_rbc.py +++ b/benchmark/test_benchmark_rbc.py @@ -2,7 +2,7 @@ import os from random import randint -from pytest import mark +from pytest import mark, param from honeybadgermpc.broadcast.reliablebroadcast import reliablebroadcast @@ -10,7 +10,7 @@ @mark.parametrize( "t, msglen", [ - (1, 200), + param(1, 200, marks=mark.skip_bench), (1, 10000), (3, 200), (3, 10000), @@ -45,7 +45,7 @@ def _prog(): @mark.parametrize( "t, msglen", [ - (1, 200), + param(1, 200, marks=mark.skip_bench), (1, 10000), (3, 200), (3, 10000), diff --git a/benchmark/test_benchmark_reed_solomon.py b/benchmark/test_benchmark_reed_solomon.py index f5eef932..f89f73c3 100644 --- a/benchmark/test_benchmark_reed_solomon.py +++ b/benchmark/test_benchmark_reed_solomon.py @@ -1,6 +1,6 @@ from random import randint -from pytest import mark +from pytest import mark, param from honeybadgermpc.elliptic_curve import Subgroup from honeybadgermpc.field import GF @@ -8,7 +8,9 @@ from honeybadgermpc.reed_solomon import GaoRobustDecoder -@mark.parametrize("t", [1, 3, 5, 10, 25, 33, 50, 100, 256]) +@mark.parametrize( + "t", [param(1, marks=mark.skip_bench), 3, 5, 10, 25, 33, 50, 100, 256] +) def test_benchmark_gao_robust_decode(benchmark, t, galois_field): n = 3 * t + 1 galois_field = GF(Subgroup.BLS12_381) @@ -34,11 +36,13 @@ def test_benchmark_gao_robust_decode(benchmark, t, galois_field): # assert set(faults) == set(decoded_faults) -@mark.parametrize("t", [1, 3, 5, 10, 25, 33, 50, 100, 256]) +@mark.parametrize( + "t", [param(1, marks=mark.skip_bench), 3, 5, 10, 25, 33, 50, 100, 256] +) def test_benchmark_gao_robust_decode_fft(benchmark, t, galois_field): n = 3 * t + 1 galois_field = GF(Subgroup.BLS12_381) - point = EvalPoint(galois_field, n, use_fft=True) + point = EvalPoint(galois_field, n, use_omega_powers=True) omega = point.omega.value p = galois_field.modulus dec = GaoRobustDecoder(t, point) diff --git a/benchmark/test_benchmark_refinement.py b/benchmark/test_benchmark_refinement.py index 393d59fa..28c0c8c2 100644 --- a/benchmark/test_benchmark_refinement.py +++ b/benchmark/test_benchmark_refinement.py @@ -1,9 +1,9 @@ -from pytest import mark +from pytest import mark, param from honeybadgermpc.progs.random_refinement import refine_randoms -@mark.parametrize("n", [4, 10, 16, 50, 100]) +@mark.parametrize("n", [param(4, marks=mark.skip_bench), 10, 16, 50, 100]) def test_benchmark_random_refinement(benchmark, n, galois_field): t = (n - 1) // 3 random_shares_int = [galois_field.random().value for _ in range(n)] diff --git a/pytest.ini b/pytest.ini index fcda9782..97e1b347 100644 --- a/pytest.ini +++ b/pytest.ini @@ -8,5 +8,8 @@ log_file_level = DEBUG norecursedirs = charm benchmark # depends on pytest-env plugin () -env = +env = PYTHONASYNCIODEBUG=1 + +markers = + skip_bench: mark a test to be run when skipping benchmarking (select with '-m skip_bench' and deselect with '-m not skip_bench"') diff --git a/tests/fixtures.py b/tests/fixtures.py index d1f59c1d..477ad957 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -216,6 +216,6 @@ def _setup(): def _work(): loop.run_until_complete(program_runner.join()) - benchmark(_work, setup=_setup) + benchmark.pedantic(_work, setup=_setup) return _benchmark_runner