Run benchmark tests "logic only" on Travis CI

The benchmark tests are run only once without reports.
initc3 · May 20, 2020 · 2821aa0 · 2821aa0
1 parent f2a9ac5
commit 2821aa0
Show file tree

Hide file tree

Showing 14 changed files with 148 additions and 30 deletions.
diff --git a/.ci/run-benchmark-tests-logic-only.sh b/.ci/run-benchmark-tests-logic-only.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+set -ev
+
+if [ -z $1 ]; then
+    tag=$IMAGE_TAG
+else
+    tag=$1
+fi
+
+docker run -it $tag \
+    pytest --verbose \
+           --benchmark-disable \
+           -m skip_bench \
+           benchmark/
diff --git a/.gitignore b/.gitignore
@@ -141,3 +141,6 @@ sharedata/
 
 .ci/deploy_key
 .ci/deploy_key.pub
+
+# benchmark
+.benchmarks
diff --git a/.travis.yml b/.travis.yml
@@ -54,6 +54,11 @@ jobs:
     install: .ci/pull-or-build-image.sh
     script: .ci/run-tests.sh
     after_success: .ci/upload-coverage-report.sh
+  - name: Benchmark Tests - logic only
+    env:
+    - SETUP_EXTRAS: "tests"
+    install: .ci/pull-or-build-image.sh
+    script: .ci/run-benchmark-tests-logic-only.sh
   - if: |
       type = pull_request OR \
       repo != initc3/HoneyBadgerMPC OR \

diff --git a/benchmark/README.md b/benchmark/README.md
@@ -3,4 +3,63 @@
 * Add a file in this folder with the prefix `test_benchmark` in the name.
 * Follow other benchmarks already written on how to write benchmark code. You can also refer to the [official documentation](https://pytest-benchmark.readthedocs.io/en/latest/).
 * To run all benchmarks: `pytest -v benchmark`
-* To run a single benchmark: `pytest -v benchmark -k <benchmark_method_name>`
+* To run a single benchmark: `pytest -v benchmark -k <benchmark_method_name>`
+
+## Disabling Benchmark Tests
+It's possible to test only the logic of the benchmark tests with the
+[`--benchmark-disable`](https://pytest-benchmark.readthedocs.io/en/latest/usage.html#commandline-options) option.
+
+> Benchmarked functions are only ran once and no stats are reported.
+> Use this if you want to run the test but don't do any benchmarking.
+
+Example:
+
+```shell
+$ pytest -v --benchmark-disable benchmark/
+```
+
+## Reducing Parametrization
+Many tests are parametrized such that the same logic will be executed
+multiple times, for different parameter values. In order to allow for
+only testing one set of parameter values, meaning running a particular
+test only once, as opposed to running it multiple times with different
+parameter values, the custom "boolean" `pytest` marker, `skip_bench` is
+available. To set `skip_bench` to `True` use the option
+`-m skip_bench`:
+
+```shell
+$ pytest -v -m skip_bench benchmark/
+```
+
+For instance, without `-m skip_bench`:
+
+```shell
+$ pytest -v benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft
+
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[1]
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[3]
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[5]
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[10]
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[25]
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[33]
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[50]
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[100]
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[256]
+```
+
+and with `-m skip_bench`:
+
+```shell
+pytest -v -m skip_bench benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft
+
+benchmark/test_benchmark_reed_solomon.py::test_benchmark_gao_robust_decode_fft[1]
+```
+
+## Logic Only Benchmark Tests Execution
+To only check whether the benchmark tests actually run properly,
+without benchmarking them and without testing for many sets of
+paramter values, use both `--benchmark-disable` and `-m skip_bench`:
+
+```shell
+$ pytest -v --benchmark-disable -m skip_bench benchmark/
+```
diff --git a/benchmark/test_benchmark_hbavss.py b/benchmark/test_benchmark_hbavss.py
@@ -2,7 +2,7 @@
 from contextlib import ExitStack
 from random import randint
 
-from pytest import mark
+from pytest import mark, param
 
 from honeybadgermpc.betterpairing import G1, ZR
 from honeybadgermpc.elliptic_curve import Subgroup
@@ -24,7 +24,7 @@ def get_avss_params(n, t):
 @mark.parametrize(
     "t, k",
     [
-        (1, 5),
+        param(1, 5, marks=mark.skip_bench),
         (3, 5),
         (5, 5),
         (16, 5),
@@ -70,7 +70,7 @@ def _prog():
 @mark.parametrize(
     "t, k",
     [
-        (1, 5),
+        param(1, 5, marks=mark.skip_bench),
         (3, 5),
         (5, 5),
         (16, 5),
@@ -113,7 +113,7 @@ def _prog():
 @mark.parametrize(
     "t, k",
     [
-        (1, 5),
+        param(1, 5, marks=mark.skip_bench),
         (3, 5),
         (5, 5),
         (16, 5),
@@ -160,7 +160,7 @@ def _prog():
 @mark.parametrize(
     "t, k",
     [
-        (1, 5),
+        param(1, 5, marks=mark.skip_bench),
         (3, 5),
         (5, 5),
         (16, 5),

diff --git a/benchmark/test_benchmark_jubjub.py b/benchmark/test_benchmark_jubjub.py
@@ -1,4 +1,4 @@
-from pytest import mark
+from pytest import mark, param
 
 from honeybadgermpc.elliptic_curve import Jubjub, Point
 from honeybadgermpc.progs.jubjub import SharedPoint, share_mul
@@ -32,7 +32,11 @@
     TEST_CURVE,
 )  # noqa: E501
 
-ALL_BIT_NUMBERS = [int(f"0b{'1' * i}", 2) for i in [1, 64, 128]]
+
+ALL_BIT_NUMBERS = [
+    param(int(f"0b{'1' * i}", 2), marks=(mark.skip_bench if i == 1 else []))
+    for i in [1, 64, 128]
+]
 
 n, t = 4, 1
 k = 50000
@@ -45,6 +49,7 @@ def run_benchmark(
     runner(prog, n, t, preprocessing, k, mixins)
 
 
+@mark.skip_bench
 def test_benchmark_shared_point_add(benchmark_runner):
     async def _prog(context):
         result = SharedPoint.from_point(context, TEST_POINT)
@@ -54,6 +59,7 @@ async def _prog(context):
     run_benchmark(benchmark_runner, _prog)
 
 
+@mark.skip_bench
 def test_benchmark_shared_point_double(benchmark_runner):
     async def _prog(context):
         result = SharedPoint.from_point(context, TEST_POINT)
@@ -87,7 +93,10 @@ async def _prog(context):
     run_benchmark(benchmark_runner, _prog)
 
 
-@mark.parametrize("bit_length", list(range(64, 257, 64)))
+@mark.parametrize(
+    "bit_length",
+    [param(i, marks=mark.skip_bench) if i == 64 else i for i in range(64, 257, 64)],
+)
 def test_benchmark_share_mul(bit_length, benchmark_runner):
     p = TEST_POINT
 

diff --git a/benchmark/test_benchmark_mimc.py b/benchmark/test_benchmark_mimc.py
@@ -1,6 +1,6 @@
 from random import randint
 
-from pytest import mark
+from pytest import mark, param
 
 from honeybadgermpc.elliptic_curve import Jubjub
 from honeybadgermpc.progs.mimc import mimc_mpc_batch
@@ -34,7 +34,10 @@
 
 
 # All iterations take around 30min total.
-@mark.parametrize("batch_size", [10 ** i for i in range(4)])
+@mark.parametrize(
+    "batch_size",
+    [param(10 ** i, marks=mark.skip_bench) if i == 0 else 10 ** i for i in range(4)],
+)
 def test_benchmark_mimc_mpc_batch(batch_size, benchmark_runner):
     async def _prog(context):
         xs = [context.preproc.get_rand(context) for _ in range(batch_size)]

diff --git a/benchmark/test_benchmark_polynomial.py b/benchmark/test_benchmark_polynomial.py
@@ -1,6 +1,6 @@
 from random import randint
 
-from pytest import mark
+from pytest import mark, param
 
 from honeybadgermpc.ntl import fft_interpolate, lagrange_interpolate
 from honeybadgermpc.polynomial import get_omega
@@ -19,26 +19,38 @@ def get_points(n, galois_field):
     return x, y, points, omega
 
 
-@mark.parametrize("n", [2 ** i for i in range(4, 11, 2)])
+@mark.parametrize(
+    "n",
+    [param(2 ** i, marks=(mark.skip_bench if i == 4 else [])) for i in range(4, 11, 2)],
+)
 def test_benchmark_lagrange_interpolate_python(benchmark, n, galois_field, polynomial):
     _, _, points, _ = get_points(n, galois_field)
     benchmark(polynomial.interpolate, points)
 
 
-@mark.parametrize("n", [2 ** i for i in range(4, 11, 2)])
+@mark.parametrize(
+    "n",
+    [param(2 ** i, marks=(mark.skip_bench if i == 4 else [])) for i in range(4, 11, 2)],
+)
 def test_benchmark_lagrange_interpolate_cpp(benchmark, n, galois_field):
     x, y, _, _ = get_points(n, galois_field)
     p = galois_field.modulus
     benchmark(lagrange_interpolate, x, y, p)
 
 
-@mark.parametrize("n", [2 ** i for i in range(4, 21, 4)])
+@mark.parametrize(
+    "n",
+    [param(2 ** i, marks=(mark.skip_bench if i == 4 else [])) for i in range(4, 21, 4)],
+)
 def test_benchmark_fft_interpolate_python(benchmark, n, galois_field, polynomial):
     _, y, _, omega = get_points(n, galois_field)
     benchmark(polynomial.interpolate_fft, y, omega)
 
 
-@mark.parametrize("n", [2 ** i for i in range(4, 21, 4)])
+@mark.parametrize(
+    "n",
+    [param(2 ** i, marks=(mark.skip_bench if i == 4 else [])) for i in range(4, 21, 4)],
+)
 def test_benchmark_fft_interpolate_cpp(benchmark, n, galois_field, polynomial):
     _, y, _, omega = get_points(n, galois_field)
     n = len(y)

diff --git a/benchmark/test_benchmark_preprocessing.py b/benchmark/test_benchmark_preprocessing.py
@@ -1,16 +1,21 @@
-from pytest import mark
+from pytest import mark, param
 
 from honeybadgermpc.preprocessing import PreProcessedElements
 
 
-@mark.parametrize("n,t,k", [(4, 1, 1024), (16, 5, 512), (50, 15, 256)])
+@mark.parametrize(
+    "n,t,k", [param(4, 1, 1024, marks=mark.skip_bench), (16, 5, 512), (50, 15, 256)]
+)
 def test_benchmark_generate_rands(benchmark, n, t, k):
     pp_elements = PreProcessedElements()
     pp_elements.clear_preprocessing()
     benchmark(pp_elements.generate_rands, k, n, t)
 
 
-@mark.parametrize("n,t,k,z", [(4, 1, 64, 64), (16, 5, 32, 32), (61, 20, 32, 32)])
+@mark.parametrize(
+    "n,t,k,z",
+    [param(4, 1, 64, 64, marks=mark.skip_bench), (16, 5, 32, 32), (61, 20, 32, 32)],
+)
 def test_benchmark_generate_powers(benchmark, n, t, k, z):
     pp_elements = PreProcessedElements()
     pp_elements.clear_preprocessing()

diff --git a/benchmark/test_benchmark_rbc.py b/benchmark/test_benchmark_rbc.py
@@ -2,15 +2,15 @@
 import os
 from random import randint
 
-from pytest import mark
+from pytest import mark, param
 
 from honeybadgermpc.broadcast.reliablebroadcast import reliablebroadcast
 
 
 @mark.parametrize(
     "t, msglen",
     [
-        (1, 200),
+        param(1, 200, marks=mark.skip_bench),
         (1, 10000),
         (3, 200),
         (3, 10000),
@@ -45,7 +45,7 @@ def _prog():
 @mark.parametrize(
     "t, msglen",
     [
-        (1, 200),
+        param(1, 200, marks=mark.skip_bench),
         (1, 10000),
         (3, 200),
         (3, 10000),

diff --git a/benchmark/test_benchmark_reed_solomon.py b/benchmark/test_benchmark_reed_solomon.py
@@ -1,14 +1,16 @@
 from random import randint
 
-from pytest import mark
+from pytest import mark, param
 
 from honeybadgermpc.elliptic_curve import Subgroup
 from honeybadgermpc.field import GF
 from honeybadgermpc.polynomial import EvalPoint, polynomials_over
 from honeybadgermpc.reed_solomon import GaoRobustDecoder
 
 
-@mark.parametrize("t", [1, 3, 5, 10, 25, 33, 50, 100, 256])
+@mark.parametrize(
+    "t", [param(1, marks=mark.skip_bench), 3, 5, 10, 25, 33, 50, 100, 256]
+)
 def test_benchmark_gao_robust_decode(benchmark, t, galois_field):
     n = 3 * t + 1
     galois_field = GF(Subgroup.BLS12_381)
@@ -34,11 +36,13 @@ def test_benchmark_gao_robust_decode(benchmark, t, galois_field):
     # assert set(faults) == set(decoded_faults)
 
 
-@mark.parametrize("t", [1, 3, 5, 10, 25, 33, 50, 100, 256])
+@mark.parametrize(
+    "t", [param(1, marks=mark.skip_bench), 3, 5, 10, 25, 33, 50, 100, 256]
+)
 def test_benchmark_gao_robust_decode_fft(benchmark, t, galois_field):
     n = 3 * t + 1
     galois_field = GF(Subgroup.BLS12_381)
-    point = EvalPoint(galois_field, n, use_fft=True)
+    point = EvalPoint(galois_field, n, use_omega_powers=True)
     omega = point.omega.value
     p = galois_field.modulus
     dec = GaoRobustDecoder(t, point)

diff --git a/benchmark/test_benchmark_refinement.py b/benchmark/test_benchmark_refinement.py
@@ -1,9 +1,9 @@
-from pytest import mark
+from pytest import mark, param
 
 from honeybadgermpc.progs.random_refinement import refine_randoms
 
 
-@mark.parametrize("n", [4, 10, 16, 50, 100])
+@mark.parametrize("n", [param(4, marks=mark.skip_bench), 10, 16, 50, 100])
 def test_benchmark_random_refinement(benchmark, n, galois_field):
     t = (n - 1) // 3
     random_shares_int = [galois_field.random().value for _ in range(n)]

diff --git a/pytest.ini b/pytest.ini
@@ -8,5 +8,8 @@ log_file_level = DEBUG
 norecursedirs = charm benchmark
 
 # depends on pytest-env plugin ()
-env = 
+env =
     PYTHONASYNCIODEBUG=1
+
+markers =
+    skip_bench: mark a test to be run when skipping benchmarking (select with '-m skip_bench' and deselect with '-m not skip_bench"')
diff --git a/tests/fixtures.py b/tests/fixtures.py
@@ -216,6 +216,6 @@ def _setup():
         def _work():
             loop.run_until_complete(program_runner.join())
 
-        benchmark(_work, setup=_setup)
+        benchmark.pedantic(_work, setup=_setup)
 
     return _benchmark_runner