Merge pull request #37 from ewanwm/feature_benchmarking

Feature benchmarking
ewanwm · Aug 7, 2024 · a919de8 · a919de8
2 parents be4f82b + 3adf938
commit a919de8
Show file tree

Hide file tree

Showing 5 changed files with 271 additions and 3 deletions.
diff --git a/.github/workflows/benchmarking.yaml b/.github/workflows/benchmarking.yaml
@@ -0,0 +1,82 @@
+name: Continuous Benchmarking
+
+on:
+  push:
+    branches: [ "main" ]
+
+  pull_request:
+    branches: [ "main" ]
+    types: [opened, reopened, edited, synchronize]
+
+  workflow_dispatch:
+
+env:
+  # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
+  BUILD_TYPE: Release
+
+jobs:
+  benchmark_pr_branch:        
+    name: Continuous Benchmarking PRs with Bencher
+    # DO NOT REMOVE: For handling Fork PRs see Pull Requests from Forks
+    if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+    permissions:
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: bencherdev/bencher@main
+
+      - name: Set up GCC
+        uses: egor-tensin/setup-gcc@v1
+        with:
+          version: 11
+          platform: x64
+
+      - name: Set Flags
+        run: export USE_CUDA=0
+
+      - name: Install Protobuf
+        run: sudo apt install protobuf-compiler
+
+      - name: Install Python dependencies
+        uses: py-actions/py-dependency-install@v4
+        with:
+          path: "PyTorch_requirements.txt"
+
+      - name: Make Build Directory
+        run: mkdir build
+
+      - name: Configure CMake
+        working-directory: ${{github.workspace}}/build
+        # Configure CMake with benchmarking option on
+        run: |
+          cmake -DNT_USE_PCH=ON -DNT_ENABLE_BENCHMARKING=ON -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} .. 
+          echo :::: Build directory post-CMake:
+          ls
+          echo 
+          echo :::: _deps directory:
+          ls _deps
+          echo 
+          echo :::: benchmark link file:
+          cat benchmarks/CMakeFiles/benchmarks.dir/link.txt
+          
+      - name: Build
+        working-directory: ${{github.workspace}}/build
+        # Build your program with the given configuration
+        run: make --trace #cmake --build ${{github.workspace}}/build
+
+      - name: Track PR Benchmarks with Bencher
+        working-directory: ${{github.workspace}}/build
+        run: |
+          ls
+          bencher run \
+          --project nutens \
+          --token '${{ secrets.BENCHER_API_TOKEN }}' \
+          --branch '${{ github.head_ref }}' \
+          --branch-start-point '${{ github.base_ref }}' \
+          --branch-start-point-hash '${{ github.event.pull_request.base.sha }}' \
+          --testbed ubuntu-latest \
+          --adapter cpp_google \
+          --err \
+          --github-actions '${{ secrets.GITHUB_TOKEN }}' \
+          "./benchmarks/benchmarks --benchmark_format=json --benchmark_repetitions=16 --benchmark_min_warmup_time=60" \
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,8 +3,8 @@ set(CMAKE_CXX_STANDARD 17)
 
 project(nuTens)
 
+# Need to add some special compile flags to check the code test coverage 
 OPTION(NT_TEST_COVERAGE "produce code coverage reports when running tests" OFF)
-
 IF(NT_TEST_COVERAGE)
     message("Adding flags to check test coverage")
     add_compile_options("--coverage")
@@ -15,11 +15,28 @@ ENDIF()
 
 enable_testing()
 
-# add dependencies
+##########################
+#### add dependencies ####
+##########################
+
 include(cmake/CPM.cmake)
 
 CPMAddPackage("gh:gabime/[email protected]")
 
+# If user wants to enable benchmarking we need to set up google benchmark dependency
+OPTION(NT_ENABLE_BENCHMARKING "enable benchmarking using google benchmark" OFF)
+IF(NT_ENABLE_BENCHMARKING)
+    message("Enabling benchmarking")
+    CPMAddPackage(
+        GITHUB_REPOSITORY "google/benchmark"
+        VERSION 1.8.5 
+        OPTIONS "BENCHMARK_DOWNLOAD_DEPENDENCIES ON"
+    )
+ELSE()
+    message("Won't benchmark")
+ENDIF()
+
+
 ## check build times
 ## have this optional as it's not supported on all CMake platforms
 OPTION(NT_BUILD_TIMING "output time to build each target" OFF)
@@ -32,10 +49,19 @@ find_package(Protobuf REQUIRED)
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
 
 
+######################################
+#### Go configure the actual code ####
+######################################
+
 add_subdirectory(nuTens)
 add_subdirectory(tests)
 
+IF(NT_ENABLE_BENCHMARKING)
+    add_subdirectory(benchmarks)
+ENDIF()
+
 
+# Print out a handy message to more easily see the config options
 message( STATUS "The following variables have been used to configure the build: " )
 get_cmake_property(_variableNames VARIABLES)
 list (SORT _variableNames)

diff --git a/README.md b/README.md
@@ -40,6 +40,14 @@ Once [nuTens](#nutens) has been built, you can verify your installation by runni
 make test
 ```
 
+## Benchmarking
+nuTens uses [Googles benchmark library](https://github.com/google/benchmark) to perform benchmarking and tracks the results uing [Bencher](https://bencher.dev). Each benchmark consists of calculating neutrino oscillations for 1024 batches of 1024 neutrino energies using the standard PMNS formalism in vacuum and in constant density matter:
+
+<p align="center">
+<a href="https://bencher.dev/perf/nutens?key=true&reports_per_page=4&branches_per_page=8&testbeds_per_page=8&benchmarks_per_page=8&plots_per_page=8&reports_page=1&branches_page=1&testbeds_page=1&benchmarks_page=1&plots_page=1&branches=5047790b-f661-476d-855c-2b0fdec44d41&testbeds=11ead677-281d-4ae2-9a13-95e7c86da045&benchmarks=f15bec4b-f28e-4cd3-90ae-c3bb3a1f91f8%2Ca422e3a6-df10-448d-848f-d91b364e5904%2Cbc8283a0-f9ab-452b-b496-db8925cd9f53%2C63a00c8d-d86a-452d-ad4a-e2d8e4e3dde8%2C9ff50688-8fea-42b3-bdd7-94a041737c2e&measures=fc8c0fd1-3b41-4ce7-826c-74843c2ea71c&start_time=1720037681000&end_time=1722629908000&clear=true&tab=branches"><img src="https://api.bencher.dev/v0/projects/nutens/perf/img?branches=5047790b-f661-476d-855c-2b0fdec44d41&testbeds=11ead677-281d-4ae2-9a13-95e7c86da045&benchmarks=f15bec4b-f28e-4cd3-90ae-c3bb3a1f91f8%2Ca422e3a6-df10-448d-848f-d91b364e5904%2Cbc8283a0-f9ab-452b-b496-db8925cd9f53%2C63a00c8d-d86a-452d-ad4a-e2d8e4e3dde8%2C9ff50688-8fea-42b3-bdd7-94a041737c2e&measures=fc8c0fd1-3b41-4ce7-826c-74843c2ea71c&start_time=1720037681000&end_time=1722629908000" title="nuTens Benchmarks" alt="nuTens - Bencher"  width="600"/>
+</a>
+</p>
+
 
 ## Feature Wishlist
 - [x] Support PyTorch in tensor library
@@ -51,7 +59,7 @@ make test
 - [x] Add test coverage checks into CI
 - [x] Integrate linting ( [cpp-linter](https://github.com/cpp-linter)? )
 - [x] Add instrumentation library for benchmarking and profiling
-- [ ] Add suite of benchmarking tests
+- [x] Add suite of benchmarking tests
 - [ ] Integrate benchmarks into CI ( maybe use [hyperfine](https://github.com/sharkdp/hyperfine) and [bencher](https://bencher.dev/) for this? )
 - [ ] Add proper unit tests
 - [ ] Expand CI to include more platforms

diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
@@ -0,0 +1,3 @@
+
+add_executable(benchmarks benchmarks.cpp)
+target_link_libraries(benchmarks benchmark::benchmark benchmark::benchmark_main tensor propagator )
diff --git a/benchmarks/benchmarks.cpp b/benchmarks/benchmarks.cpp
@@ -0,0 +1,149 @@
+
+#include <benchmark/benchmark.h>
+#include <nuTens/propagator/const-density-solver.hpp>
+#include <nuTens/propagator/propagator.hpp>
+#include <nuTens/tensors/tensor.hpp>
+
+Tensor buildPMNS(const Tensor &theta12, const Tensor &theta13, const Tensor &theta23, const Tensor &deltaCP)
+{
+    // set up the three matrices to build the PMNS matrix
+    Tensor M1;
+    Tensor M2;
+    Tensor M3;
+    M1.zeros({1, 3, 3}, NTdtypes::kComplexFloat).requiresGrad(false);
+    M2.zeros({1, 3, 3}, NTdtypes::kComplexFloat).requiresGrad(false);
+    M3.zeros({1, 3, 3}, NTdtypes::kComplexFloat).requiresGrad(false);
+
+    M1.setValue({0, 0, 0}, 1.0);
+    M1.setValue({0, 1, 1}, Tensor::cos(theta23));
+    M1.setValue({0, 1, 2}, Tensor::sin(theta23));
+    M1.setValue({0, 2, 1}, -Tensor::sin(theta23));
+    M1.setValue({0, 2, 2}, Tensor::cos(theta23));
+    M1.requiresGrad(true);
+
+    M2.setValue({0, 1, 1}, 1.0);
+    M2.setValue({0, 0, 0}, Tensor::cos(theta13));
+    M2.setValue({0, 0, 2}, Tensor::mul(Tensor::sin(theta13), Tensor::exp(Tensor::scale(deltaCP, -1.0J))));
+    M2.setValue({0, 2, 0}, -Tensor::mul(Tensor::sin(theta13), Tensor::exp(Tensor::scale(deltaCP, 1.0J))));
+    M2.setValue({0, 2, 2}, Tensor::cos(theta13));
+    M2.requiresGrad(true);
+
+    M3.setValue({0, 2, 2}, 1.0);
+    M3.setValue({0, 0, 0}, Tensor::cos(theta12));
+    M3.setValue({0, 0, 1}, Tensor::sin(theta12));
+    M3.setValue({0, 1, 0}, -Tensor::sin(theta12));
+    M3.setValue({0, 1, 1}, Tensor::cos(theta12));
+    M3.requiresGrad(true);
+
+    // Build PMNS
+    Tensor PMNS = Tensor::matmul(M1, Tensor::matmul(M2, M3));
+    PMNS.requiresGrad(true);
+
+    return PMNS;
+}
+
+static void batchedOscProbs(const Propagator &prop, Tensor &energies, int batchSize, int nBatches)
+{
+    for (int _ = 0; _ < nBatches; _++)
+    {
+        // set random energy values
+        for (int i = 0; i < batchSize; i++)
+        {
+            // set to random energy between 0 and 10000.0 MeV
+            energies.setValue({i, 0}, ((float)std::rand() / (float)RAND_MAX) * 10000.0);
+        }
+
+        // calculate the osc probabilities
+        // static_cast<void> to discard the return value that we're not supposed to discard :)
+        static_cast<void>(prop.calculateProbs(energies).sum());
+    }
+}
+
+static void BM_vacuumOscillations(benchmark::State &state)
+{
+
+    // set up the inputs
+    Tensor energies;
+    energies.zeros({state.range(0), 1}, NTdtypes::kFloat).requiresGrad(false);
+
+    Tensor masses;
+    masses.ones({1, 3}, NTdtypes::kFloat).requiresGrad(false);
+    masses.setValue({0, 0}, 0.1);
+    masses.setValue({0, 1}, 0.2);
+    masses.setValue({0, 2}, 0.3);
+
+    Tensor theta23;
+    Tensor theta13;
+    Tensor theta12;
+    Tensor deltaCP;
+    theta23.ones({1}, NTdtypes::kComplexFloat).requiresGrad(false).setValue({0}, 0.23);
+    theta13.ones({1}, NTdtypes::kComplexFloat).requiresGrad(false).setValue({0}, 0.13);
+    theta12.ones({1}, NTdtypes::kComplexFloat).requiresGrad(false).setValue({0}, 0.12);
+    deltaCP.ones({1}, NTdtypes::kComplexFloat).requiresGrad(false).setValue({0}, 0.5);
+
+    Tensor PMNS = buildPMNS(theta12, theta13, theta23, deltaCP);
+
+    // set up the propagator
+    Propagator vacuumProp(3, 100.0);
+    vacuumProp.setPMNS(PMNS);
+    vacuumProp.setMasses(masses);
+
+    // seed the random number generator for the energies
+    std::srand(123);
+
+    for (auto _ : state)
+    {
+        // This code gets timed
+        batchedOscProbs(vacuumProp, energies, state.range(0), state.range(1));
+    }
+}
+
+static void BM_constMatterOscillations(benchmark::State &state)
+{
+
+    // set up the inputs
+    Tensor energies;
+    energies.zeros({state.range(0), 1}, NTdtypes::kFloat).requiresGrad(false);
+
+    Tensor masses;
+    masses.ones({1, 3}, NTdtypes::kFloat).requiresGrad(false);
+    masses.setValue({0, 0}, 0.1);
+    masses.setValue({0, 1}, 0.2);
+    masses.setValue({0, 2}, 0.3);
+
+    Tensor theta23;
+    Tensor theta13;
+    Tensor theta12;
+    Tensor deltaCP;
+    theta23.ones({1}, NTdtypes::kComplexFloat).requiresGrad(false).setValue({0}, 0.23);
+    theta13.ones({1}, NTdtypes::kComplexFloat).requiresGrad(false).setValue({0}, 0.13);
+    theta12.ones({1}, NTdtypes::kComplexFloat).requiresGrad(false).setValue({0}, 0.12);
+    deltaCP.ones({1}, NTdtypes::kComplexFloat).requiresGrad(false).setValue({0}, 0.5);
+
+    Tensor PMNS = buildPMNS(theta12, theta13, theta23, deltaCP);
+
+    // set up the propagator
+    Propagator matterProp(3, 100.0);
+    std::unique_ptr<BaseMatterSolver> matterSolver = std::make_unique<ConstDensityMatterSolver>(3, 2.6);
+    matterProp.setPMNS(PMNS);
+    matterProp.setMasses(masses);
+    matterProp.setMatterSolver(matterSolver);
+
+    // seed the random number generator for the energies
+    std::srand(123);
+
+    for (auto _ : state)
+    {
+        // This code gets timed
+        batchedOscProbs(matterProp, energies, state.range(0), state.range(1));
+    }
+}
+
+// Register the function as a benchmark
+BENCHMARK(BM_vacuumOscillations)->Name("Vacuul Oscillations")->Args({1 << 10, 1 << 10});
+
+// Register the function as a benchmark
+BENCHMARK(BM_constMatterOscillations)->Name("Const Density Oscillations")->Args({1 << 10, 1 << 10});
+
+// Run the benchmark
+BENCHMARK_MAIN();
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@

		add_executable(benchmarks benchmarks.cpp)
		target_link_libraries(benchmarks benchmark::benchmark benchmark::benchmark_main tensor propagator )