diff --git a/.github/workflows/pyrealm_ci.yaml b/.github/workflows/pyrealm_ci.yaml index 05a7d48b..adf7baed 100644 --- a/.github/workflows/pyrealm_ci.yaml +++ b/.github/workflows/pyrealm_ci.yaml @@ -83,3 +83,32 @@ jobs: name: built-docs path: docs/build retention-days: 2 + + validate_profiling_tests: + needs: qa + runs-on: ubuntu-latest + strategy: + fail-fast: false + + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install Poetry + uses: abatilo/actions-poetry@v2.1.6 + with: + poetry-version: 1.8.2 + + - name: Install dependencies and add env vars + run: | + poetry install + + - name: Run the profiling test suite, using time to get peak memory usage + id: profiling + run: poetry run /usr/bin/time -v pytest -m "profiling" + \ No newline at end of file diff --git a/.github/workflows/pyrealm_profiling_after_push.yaml b/.github/workflows/pyrealm_profiling_after_push.yaml deleted file mode 100644 index 864bc1bd..00000000 --- a/.github/workflows/pyrealm_profiling_after_push.yaml +++ /dev/null @@ -1,79 +0,0 @@ -name: Profiling after push - -# This runs when code is pushed to main or develop -on: - push: - branches: [main, develop] - -jobs: - test: - uses: ./.github/workflows/pyrealm_ci.yaml - secrets: inherit - - profiling: - needs: test - runs-on: ubuntu-latest - strategy: - fail-fast: false - - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.head_ref }} - - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install Poetry - uses: abatilo/actions-poetry@v2.1.6 - with: - poetry-version: 1.8.2 - - - name: Install dependencies and add env vars - run: | - poetry install - sudo apt-get install graphviz - calculatedSha=$(git rev-parse --short ${{ github.sha }}) - echo "COMMIT_SHORT_SHA=$calculatedSha" >> $GITHUB_ENV - - - name: Run the profiling test suite, using time to get peak memory usage - id: profiling - run: poetry run /usr/bin/time -v pytest -m "profiling" --profile-svg - - # This step only runs if the profiling succeeds - - name: Benchmark performance - id: benchmarking - if: ${{steps.profiling.outcome == 'success'}} - run: poetry run python profiling/run_benchmarking.py prof/combined.prof profiling/profiling-database.csv profiling/benchmark-fails.csv ${{ env.COMMIT_SHORT_SHA }} --update-on-pass --plot-path profiling/performance-plot.png - - # This step copies over the updated call graph - - name: Copy call graph - id: update-call-graph - if: always() && steps.benchmarking.outcome != 'skipped' - run: cp prof/combined.svg profiling/call-graph.svg - - # Creates a PR to update the profiling stats if the benchmark passes or fails and - # skips if the profiling step fails. - - name: Create pull request - id: cpr - if: always() && steps.benchmarking.outcome != 'skipped' - uses: peter-evans/create-pull-request@v6 - with: - commit-message: Updating profiling stats for ${{ env.COMMIT_SHORT_SHA }} - title: Updating profiling stats for ${{ env.COMMIT_SHORT_SHA }} - add-paths: profiling/* - body: | - Automatic update of profiling stats. - - - Profiling update for merge of ${{ env.COMMIT_SHORT_SHA }} into ${{ github.head_ref }} - - Updated call graph, benchmark plot and profiling stats database. - - Auto-generated by `pyrealm_profiling.yaml` workflow - - ${{ steps.benchmarking.outcome != 'passed' && 'Benchmarking passed' || 'Benchmarking **failed**: review failures.' }} - - - name: Check outputs - if: ${{ steps.cpr.outputs.pull-request-number }} - run: | - echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}" - echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}" \ No newline at end of file diff --git a/.github/workflows/pyrealm_profiling_on_approve.yaml b/.github/workflows/pyrealm_profiling_on_approve.yaml deleted file mode 100644 index 07ebacc6..00000000 --- a/.github/workflows/pyrealm_profiling_on_approve.yaml +++ /dev/null @@ -1,64 +0,0 @@ -name: Profiling on approve - -# This runs when a pull request is submitted -on: - pull_request_review: - types: [submitted] - -jobs: - test: - if: github.event.review.state == 'APPROVED' - uses: ./.github/workflows/pyrealm_ci.yaml - secrets: inherit - - profiling: - needs: test - runs-on: ubuntu-latest - strategy: - fail-fast: false - - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.ref }} - - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install Poetry - uses: abatilo/actions-poetry@v2.1.6 - with: - poetry-version: 1.8.2 - - - name: Install dependencies and add env vars - run: | - poetry install - sudo apt-get install graphviz - calculatedSha=$(git rev-parse --short ${{ github.sha }}) - echo "COMMIT_SHORT_SHA=$calculatedSha" >> $GITHUB_ENV - - - name: Run the profiling test suite, using time to get peak memory usage - id: profiling - run: poetry run /usr/bin/time -v pytest -m "profiling" --profile-svg - - # This step only runs if the profiling succeeds - - name: Benchmark performance - id: benchmarking - if: ${{steps.profiling.outcome == 'success'}} - run: poetry run python profiling/run_benchmarking.py prof/combined.prof profiling/profiling-database.csv profiling/benchmark-fails.csv ${{ env.COMMIT_SHORT_SHA }} --update-on-pass --plot-path profiling/performance-plot.png - - # This step copies over the updated call graph - - name: Copy call graph - id: update-call-graph - if: always() && steps.benchmarking.outcome != 'skipped' - run: cp prof/combined.svg profiling/call-graph.svg - - # This runs if the benchmark passes or fails and skips if the profiling step fails. - # The commit message includes [skip actions] to avoid triggering a new round - - uses: stefanzweifel/git-auto-commit-action@v5 - if: always() && steps.benchmarking.outcome != 'skipped' - with: - commit_message: Auto-generated profiling results [skip actions] - file_pattern: 'profiling/*' - branch: ${{ github.event.pull_request.head.ref }} diff --git a/.github/workflows/pyrealm_profiling_without_benchmarking.yaml b/.github/workflows/pyrealm_profiling_without_benchmarking.yaml deleted file mode 100644 index 2ff82a63..00000000 --- a/.github/workflows/pyrealm_profiling_without_benchmarking.yaml +++ /dev/null @@ -1,43 +0,0 @@ -name: Profiling without benchmarking - -# This runs when a PR is opened, synchronised or reopened -on: - pull_request: - types: [opened, synchronize, reopened] - workflow_call: - -jobs: - test: - uses: ./.github/workflows/pyrealm_ci.yaml - secrets: inherit - - profiling: - needs: test - runs-on: ubuntu-latest - strategy: - fail-fast: false - - steps: - - uses: actions/checkout@v4 - with: - ref: ${{ github.head_ref }} - - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install Poetry - uses: abatilo/actions-poetry@v2.1.6 - with: - poetry-version: 1.8.2 - - - name: Install dependencies and add env vars - run: | - poetry install - sudo apt-get install graphviz - calculatedSha=$(git rev-parse --short ${{ github.sha }}) - echo "COMMIT_SHORT_SHA=$calculatedSha" >> $GITHUB_ENV - - - name: Run the profiling test suite, using time to get peak memory usage - id: profiling - run: poetry run /usr/bin/time -v pytest -m "profiling" --profile-svg diff --git a/.gitignore b/.gitignore index 597dc5c0..58b6acb6 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ pyrealm_build_data/eda.py # Profiling prof/ +profiling/profiling-database.csv diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 32fadaf1..8edcfb26 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -99,13 +99,30 @@ details on the workflow and process. Even if the code works as expected and passes all our tests, it can still be slow! We use code profiling to work out where time is spent when using `pyrealm` and identify where we can improve performance. We also use benchmarking between `pyrealm` versions to -make sure that changes to the code aren't making it slower. This is run automatically -when new code is pulled to the `develop` or `main` branches but can also be used to do -local profiling and benchmarking. +make sure that changes to the code aren't making it slower. -See the [profiling and benchmarking -page](https://pyrealm.readthedocs.io/en/latest/development/profiling_and_benchmarking.md) -for more details. +This is currently run manually using the `performance_regression_checking.sh` script in +the `profiling` directory. When this bash script is run without an argument, the current +`HEAD` will be compared to the `origin/develop` branch. Alternatively, the two commits +to be compared can be provided as parameters with `-n` for the "new" commit (the current +HEAD, or code you have changed), and `-o` for the "old" commit (the baseline code you +want to compare the perfomance against). The code will fail with an error message if the +new performance is more than 5% slower than the baseline, otherwise it will succeed with +a message indicating whether the new code is faster or has similar performance. + +```sh +cd profiling +poetry run performance_regression_checking.sh +``` + +To look deeper into how much time is spent in each function, the command + +```bash +poetry run pytest --profile-svg -m "profiling" +``` + +will run the profiling tests and generate a call graph and table with this information in +the `profiling` subdirectory. ### Documentation diff --git a/profiling/performance_regression_checking.sh b/profiling/performance_regression_checking.sh new file mode 100755 index 00000000..c7f06fa5 --- /dev/null +++ b/profiling/performance_regression_checking.sh @@ -0,0 +1,104 @@ +#!/bin/bash + +if [[ $# -eq 0 ]] ; then + echo "No input arguments, comparing HEAD to origin/develop" + new_commit=HEAD + old_commit=origin/develop +else + while getopts n:o: flag + do + case "${flag}" in + n) new_commit=${OPTARG};; + o) old_commit=${OPTARG};; + *) echo "Invalid input argument"; exit 1;; + esac + done +fi + +cd .. +git checkout $new_commit + +# Remember where we start from +current_repo=`pwd` + +#This is the where we want to check the other worktree out to +cmp_repo=$current_repo/../pyrealm_performance_check + +# Adding the worktree +echo "Add worktree" $cmp_repo +git worktree add $cmp_repo $old_commit + +# Go there and activate poetry environment +cd $cmp_repo +poetry install +#source .venv/bin/activate + +# Run the profiling on old commit +echo "Run profiling tests on old commit" +if [[ "$OSTYPE" == "linux-gnu"* ]]; then #Linux + poetry run /usr/bin/time -v pytest -m "profiling" --profile-svg +elif [[ "$OSTYPE" == "darwin"* ]]; then #Mac OS + poetry run /usr/bin/time -l pytest -m "profiling" --profile-svg +fi +if [ "$?" != "0" ]; then + echo "Profiling the current code went wrong." + exit 1 +fi + +# Go back into the current repo and run there +cd $current_repo +poetry install +echo "Run profiling tests on new commit" +if [[ "$OSTYPE" == "linux-gnu"* ]]; then #Linux + poetry run /usr/bin/time -v pytest -m "profiling" --profile-svg +elif [[ "$OSTYPE" == "darwin"* ]]; then #Mac OS + poetry run /usr/bin/time -l pytest -m "profiling" --profile-svg +fi +if [ "$?" != "0" ]; then + echo "Profiling the new code went wrong." + exit 1 +fi + +# Compare the profiling outputs +cd profiling +python -c " +from pathlib import Path +import simple_benchmarking +import pandas as pd +import sys + +prof_path_old = Path('$cmp_repo'+'/prof/combined.prof') +print(prof_path_old) +df_old = simple_benchmarking.run_simple_benchmarking(prof_path=prof_path_old) +cumtime_old = (df_old.sum(numeric_only=True)['cumtime']) +print('Old time:', cumtime_old) + +prof_path_new = Path('$current_repo'+'/prof/combined.prof') +print(prof_path_new) +df_new = simple_benchmarking.run_simple_benchmarking(prof_path=prof_path_new) +cumtime_new = (df_new.sum(numeric_only=True)['cumtime']) +print('New time:', cumtime_new) + +if cumtime_old < 0.95*cumtime_new: + print('We got slower. :(') + sys.exit(1) +elif cumtime_new < 0.95*cumtime_old: + print('We got quicker! :)') +else: + print('Times haven\'t changed') +" + +benchmarking_out="$?" + +cd .. +# Remove the working tree for the comparison commit +echo "Clean up" +git worktree remove --force $cmp_repo +git worktree prune + +if [ $benchmarking_out != "0" ]; then + echo "The new code is more than 5% slower than the old one." + exit 1 +fi + +echo "No significant performance regression detected." diff --git a/profiling/profiling-database.csv b/profiling/profiling-database.csv deleted file mode 100644 index 5911a78f..00000000 --- a/profiling/profiling-database.csv +++ /dev/null @@ -1,78 +0,0 @@ -ncalls,tottime,tottime_percall,cumtime,cumtime_percall,timestamp,filename,lineno,function,process_id,label,ignore_result,ignore_justification -13,0.432,0.033,0.432,0.033,2024-05-07T16:26:25,pyrealm/pmodel/functions.py,14,calc_ftemp_arrh,pyrealm/pmodel/functions.py:calc_ftemp_arrh,7c3fe42,False, -3,0.283,0.094,0.283,0.094,2024-05-07T16:26:25,pyrealm/pmodel/functions.py,77,calc_ftemp_inst_rd,pyrealm/pmodel/functions.py:calc_ftemp_inst_rd,7c3fe42,False, -3,0.698,0.233,0.886,0.295,2024-05-07T16:26:25,pyrealm/pmodel/functions.py,119,calc_ftemp_inst_vcmax,pyrealm/pmodel/functions.py:calc_ftemp_inst_vcmax,7c3fe42,False, -4,0.372,0.093,0.401,0.1,2024-05-07T16:26:25,pyrealm/pmodel/functions.py,197,calc_ftemp_kphio,pyrealm/pmodel/functions.py:calc_ftemp_kphio,7c3fe42,False, -2,0.152,0.076,0.189,0.094,2024-05-07T16:26:25,pyrealm/pmodel/functions.py,256,calc_gammastar,pyrealm/pmodel/functions.py:calc_gammastar,7c3fe42,False, -2,0.01,0.005,4.142,2.071,2024-05-07T16:26:25,pyrealm/pmodel/functions.py,310,calc_ns_star,pyrealm/pmodel/functions.py:calc_ns_star,7c3fe42,False, -2,0.188,0.094,0.322,0.161,2024-05-07T16:26:25,pyrealm/pmodel/functions.py,354,calc_kmm,pyrealm/pmodel/functions.py:calc_kmm,7c3fe42,False, -2,0.13,0.065,0.13,0.065,2024-05-07T16:26:25,pyrealm/pmodel/functions.py,644,calc_co2_to_ca,pyrealm/pmodel/functions.py:calc_co2_to_ca,7c3fe42,False, -4,0.101,0.025,0.647,0.162,2024-05-07T16:26:25,pyrealm/core/water.py,67,calc_density_h2o_fisher,pyrealm/core/water.py:calc_density_h2o_fisher,7c3fe42,False, -5,0.005,0.001,1.721,0.344,2024-05-07T16:26:25,pyrealm/core/water.py,126,calc_density_h2o,pyrealm/core/water.py:calc_density_h2o,7c3fe42,False, -4,3.465,0.866,4.132,1.033,2024-05-07T16:26:25,pyrealm/core/water.py,181,calc_viscosity_h2o,pyrealm/core/water.py:calc_viscosity_h2o,7c3fe42,False, -2,0.179,0.089,0.253,0.126,2024-05-07T16:26:25,pyrealm/pmodel/isotopes.py,42,__init__,pyrealm/pmodel/isotopes.py:CalcCarbonIsotopes.__init__,7c3fe42,False, -2,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/isotopes.py,98,__repr__,pyrealm/pmodel/isotopes.py:CalcCarbonIsotopes.__repr__,7c3fe42,False, -1,0.016,0.016,0.016,0.016,2024-05-07T16:26:25,pyrealm/pmodel/isotopes.py,102,calc_c4_discrimination,pyrealm/pmodel/isotopes.py:CalcCarbonIsotopes.calc_c4_discrimination,7c3fe42,False, -1,0.058,0.058,0.058,0.058,2024-05-07T16:26:25,pyrealm/pmodel/isotopes.py,187,calc_c3_discrimination,pyrealm/pmodel/isotopes.py:CalcCarbonIsotopes.calc_c3_discrimination,7c3fe42,False, -2,0.0,0.0,0.477,0.238,2024-05-07T16:26:25,pyrealm/pmodel/isotopes.py,226,summarize,pyrealm/pmodel/isotopes.py:CalcCarbonIsotopes.summarize,7c3fe42,False, -3,0.267,0.089,1.929,0.643,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,186,__init__,pyrealm/pmodel/pmodel.py:PModel.__init__,7c3fe42,False, -16,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,300,_check_estimated,pyrealm/pmodel/pmodel.py:PModel._check_estimated,7c3fe42,False, -4,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,309,gpp,pyrealm/pmodel/pmodel.py:PModel.gpp,7c3fe42,False, -3,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,316,vcmax,pyrealm/pmodel/pmodel.py:PModel.vcmax,7c3fe42,False, -2,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,322,vcmax25,pyrealm/pmodel/pmodel.py:PModel.vcmax25,7c3fe42,False, -2,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,328,rd,pyrealm/pmodel/pmodel.py:PModel.rd,7c3fe42,False, -3,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,334,jmax,pyrealm/pmodel/pmodel.py:PModel.jmax,7c3fe42,False, -2,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,340,gs,pyrealm/pmodel/pmodel.py:PModel.gs,7c3fe42,False, -3,0.712,0.237,2.207,0.736,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,358,estimate_productivity,pyrealm/pmodel/pmodel.py:PModel.estimate_productivity,7c3fe42,False, -2,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,443,__repr__,pyrealm/pmodel/pmodel.py:PModel.__repr__,7c3fe42,False, -2,0.0,0.0,0.646,0.323,2024-05-07T16:26:25,pyrealm/pmodel/pmodel.py,456,summarize,pyrealm/pmodel/pmodel.py:PModel.summarize,7c3fe42,False, -3,0.0,0.0,0.885,0.295,2024-05-07T16:26:25,pyrealm/pmodel/jmax_limitation.py,75,__init__,pyrealm/pmodel/jmax_limitation.py:JmaxLimitation.__init__,7c3fe42,False, -3,0.885,0.295,0.885,0.295,2024-05-07T16:26:25,pyrealm/pmodel/jmax_limitation.py,127,wang17,pyrealm/pmodel/jmax_limitation.py:JmaxLimitation.wang17,7c3fe42,False, -2,0.004,0.002,5.456,2.728,2024-05-07T16:26:25,pyrealm/pmodel/pmodel_environment.py,76,__init__,pyrealm/pmodel/pmodel_environment.py:PModelEnvironment.__init__,7c3fe42,False, -4,0.0,0.0,0.803,0.201,2024-05-07T16:26:25,pyrealm/pmodel/optimal_chi.py,95,__init__,pyrealm/pmodel/optimal_chi.py:OptimalChiABC.__init__,7c3fe42,False, -4,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/optimal_chi.py,150,_check_requires,pyrealm/pmodel/optimal_chi.py:OptimalChiABC._check_requires,7c3fe42,False, -3,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/optimal_chi.py,248,set_beta,pyrealm/pmodel/optimal_chi.py:OptimalChiPrentice14.set_beta,7c3fe42,False, -4,0.803,0.201,0.803,0.201,2024-05-07T16:26:25,pyrealm/pmodel/optimal_chi.py,253,estimate_chi,pyrealm/pmodel/optimal_chi.py:OptimalChiPrentice14.estimate_chi,7c3fe42,False, -1,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/optimal_chi.py,380,set_beta,pyrealm/pmodel/optimal_chi.py:OptimalChiC4.set_beta,7c3fe42,False, -1,0.188,0.188,0.216,0.216,2024-05-07T16:26:25,pyrealm/pmodel/optimal_chi.py,385,estimate_chi,pyrealm/pmodel/optimal_chi.py:OptimalChiC4.estimate_chi,7c3fe42,False, -4,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/__init__.py,28,warning_on_one_line,pyrealm/__init__.py:warning_on_one_line,7c3fe42,False, -1,0.1,0.1,0.1,0.1,2024-05-07T16:26:25,pyrealm/pmodel/competition.py,14,convert_gpp_advantage_to_c4_fraction,pyrealm/pmodel/competition.py:convert_gpp_advantage_to_c4_fraction,7c3fe42,False, -1,0.126,0.126,0.144,0.144,2024-05-07T16:26:25,pyrealm/pmodel/competition.py,62,calculate_tree_proportion,pyrealm/pmodel/competition.py:calculate_tree_proportion,7c3fe42,False, -1,0.091,0.091,0.345,0.345,2024-05-07T16:26:25,pyrealm/pmodel/competition.py,182,__init__,pyrealm/pmodel/competition.py:C3C4Competition.__init__,7c3fe42,False, -2,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/competition.py,245,__repr__,pyrealm/pmodel/competition.py:C3C4Competition.__repr__,7c3fe42,False, -1,0.098,0.098,0.098,0.098,2024-05-07T16:26:25,pyrealm/pmodel/competition.py,249,estimate_isotopic_discrimination,pyrealm/pmodel/competition.py:C3C4Competition.estimate_isotopic_discrimination,7c3fe42,False, -2,0.0,0.0,0.43,0.215,2024-05-07T16:26:25,pyrealm/pmodel/competition.py,289,summarize,pyrealm/pmodel/competition.py:C3C4Competition.summarize,7c3fe42,False, -962,0.013,0.0,0.016,0.0,2024-05-07T16:26:25,pyrealm/core/utilities.py,52,check_input_shapes,pyrealm/core/utilities.py:check_input_shapes,7c3fe42,False, -6,0.168,0.028,1.553,0.259,2024-05-07T16:26:25,pyrealm/core/utilities.py,113,summarize_attrs,pyrealm/core/utilities.py:summarize_attrs,7c3fe42,False, -13,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/core/utilities.py,184,_get_interval_functions,pyrealm/core/utilities.py:_get_interval_functions,7c3fe42,False, -13,0.745,0.057,0.752,0.058,2024-05-07T16:26:25,pyrealm/core/utilities.py,213,bounds_checker,pyrealm/core/utilities.py:bounds_checker,7c3fe42,False, -17,1.463,0.086,1.574,0.093,2024-05-07T16:26:25,pyrealm/core/utilities.py,339,evaluate_horner_polynomial,pyrealm/core/utilities.py:evaluate_horner_polynomial,7c3fe42,False, -3,0.019,0.006,0.019,0.006,2024-05-07T16:26:25,pyrealm/pmodel/subdaily.py,39,memory_effect,pyrealm/pmodel/subdaily.py:memory_effect,7c3fe42,False, -1,0.265,0.265,1.222,1.222,2024-05-07T16:26:25,pyrealm/pmodel/subdaily.py,218,__init__,pyrealm/pmodel/subdaily.py:SubdailyPModel.__init__,7c3fe42,False, -1,0.012,0.012,0.012,0.012,2024-05-07T16:26:25,pyrealm/pmodel/scaler.py,70,__init__,pyrealm/pmodel/scaler.py:SubdailyScaler.__init__,7c3fe42,False, -1,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/scaler.py,175,_set_times,pyrealm/pmodel/scaler.py:SubdailyScaler._set_times,7c3fe42,False, -1,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/scaler.py,210,set_window,pyrealm/pmodel/scaler.py:SubdailyScaler.set_window,7c3fe42,False, -6,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/pmodel/scaler.py,309,pad_values,pyrealm/pmodel/scaler.py:SubdailyScaler.pad_values,7c3fe42,False, -6,0.008,0.001,0.008,0.001,2024-05-07T16:26:25,pyrealm/pmodel/scaler.py,332,get_window_values,pyrealm/pmodel/scaler.py:SubdailyScaler.get_window_values,7c3fe42,False, -6,0.0,0.0,0.011,0.002,2024-05-07T16:26:25,pyrealm/pmodel/scaler.py,370,get_daily_means,pyrealm/pmodel/scaler.py:SubdailyScaler.get_daily_means,7c3fe42,False, -3,0.0,0.0,0.019,0.006,2024-05-07T16:26:25,pyrealm/pmodel/scaler.py,407,fill_daily_to_subdaily,pyrealm/pmodel/scaler.py:SubdailyScaler.fill_daily_to_subdaily,7c3fe42,False, -1,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/__init__.py,20,__init__,pyrealm/__init__.py:ExperimentalFeatureWarning.__init__,7c3fe42,False, -1,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/__init__.py,23,__str__,pyrealm/__init__.py:ExperimentalFeatureWarning.__str__,7c3fe42,False, -1,0.369,0.369,0.369,0.369,2024-05-07T16:26:25,pyrealm/core/pressure.py,10,calc_patm,pyrealm/core/pressure.py:calc_patm,7c3fe42,False, -1,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/core/calendar.py,89,__post_init__,pyrealm/core/calendar.py:Calendar.__post_init__,7c3fe42,False, -1,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/core/calendar.py,104,__getitem__,pyrealm/core/calendar.py:Calendar.__getitem__,7c3fe42,False, -2,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/core/calendar.py,113,__len__,pyrealm/core/calendar.py:Calendar.__len__,7c3fe42,False, -1,0.154,0.154,0.154,0.154,2024-05-07T16:26:25,pyrealm/core/hygro.py,188,calc_saturation_vapour_pressure_slope,pyrealm/core/hygro.py:calc_saturation_vapour_pressure_slope,7c3fe42,False, -2,0.139,0.069,0.139,0.069,2024-05-07T16:26:25,pyrealm/core/hygro.py,210,calc_enthalpy_vaporisation,pyrealm/core/hygro.py:calc_enthalpy_vaporisation,7c3fe42,False, -1,0.007,0.007,0.232,0.232,2024-05-07T16:26:25,pyrealm/core/hygro.py,227,calc_specific_heat,pyrealm/core/hygro.py:calc_specific_heat,7c3fe42,False, -1,0.058,0.058,0.359,0.359,2024-05-07T16:26:25,pyrealm/core/hygro.py,259,calc_psychrometric_constant,pyrealm/core/hygro.py:calc_psychrometric_constant,7c3fe42,False, -1,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/core/solar.py,26,calc_heliocentric_longitudes,pyrealm/core/solar.py:calc_heliocentric_longitudes,7c3fe42,False, -1,0.275,0.275,1.913,1.913,2024-05-07T16:26:25,pyrealm/splash/evap.py,74,__post_init__,pyrealm/splash/evap.py:DailyEvapFluxes.__post_init__,7c3fe42,False, -463,2.919,0.002,3.019,0.002,2024-05-07T16:26:25,pyrealm/splash/evap.py,109,estimate_aet,pyrealm/splash/evap.py:DailyEvapFluxes.estimate_aet,7c3fe42,False, -1,4.499,4.499,4.616,4.616,2024-05-07T16:26:25,pyrealm/splash/solar.py,71,__post_init__,pyrealm/splash/solar.py:DailySolarFluxes.__post_init__,7c3fe42,False, -1,0.001,0.001,7.056,7.056,2024-05-07T16:26:25,pyrealm/splash/splash.py,58,__init__,pyrealm/splash/splash.py:SplashModel.__init__,7c3fe42,False, -1,0.002,0.002,1.69,1.69,2024-05-07T16:26:25,pyrealm/splash/splash.py,110,estimate_initial_soil_moisture,pyrealm/splash/splash.py:SplashModel.estimate_initial_soil_moisture,7c3fe42,False, -463,0.159,0.0,3.362,0.002,2024-05-07T16:26:25,pyrealm/splash/splash.py,224,estimate_daily_water_balance,pyrealm/splash/splash.py:SplashModel.estimate_daily_water_balance,7c3fe42,False, -1,0.027,0.027,1.755,1.755,2024-05-07T16:26:25,pyrealm/splash/splash.py,287,calculate_soil_moisture,pyrealm/splash/splash.py:SplashModel.calculate_soil_moisture,7c3fe42,False, -1,0.212,0.212,1.044,1.044,2024-05-07T16:26:25,pyrealm/core/water.py,12,calc_density_h2o_chen,pyrealm/core/water.py:calc_density_h2o_chen,7c3fe42,False, -1,0.0,0.0,0.0,0.0,2024-05-07T16:26:25,pyrealm/constants/core_const.py,242,__post_init__,pyrealm/constants/core_const.py:CoreConst.__post_init__,7c3fe42,False, diff --git a/profiling/simple_benchmarking.py b/profiling/simple_benchmarking.py new file mode 100644 index 00000000..40fec37c --- /dev/null +++ b/profiling/simple_benchmarking.py @@ -0,0 +1,117 @@ +"""Run profile benchmarking and generate benchmarking graphics.""" + +import datetime +import pstats +import sys +import textwrap +from argparse import ArgumentParser +from io import StringIO +from pathlib import Path + +import pandas as pd + + +def run_simple_benchmarking( + prof_path: Path, + exclude: list[str] = ["{.*", "<.*", "/lib/", "/tests/", "virtualenv", "venv"], +) -> pd.DataFrame: + """Run a simplified benchmarking version. + + The function reads the contents of a ``.prof`` file (typically + ``prof/combined.prof``) generated by running the profiling test suite and returns + the profiling data as a standardised `pandas.DataFrame`. + + The profiling results include a field 'filename:lineno(function)', which identifies + each profiled code object. Many of these will be from functions outside of the + `pyrealm` codebase and these are excluded using a list of regex patterns: + + * '/lib/' excludes standard and site packages, + * '{.*' excludes profiling of '{built-in ... } and similar, + * '<.*' excludes profiling of '` and similar. + * '/tests/' excludes the test functions and classes calling the pyrealm code. + * 'virtualenv' and 'venv' exclude standard packages in virtual environments + + Args: + prof_path: Path to the profiling output. + exclude: A list of patterns used to exclude rows from the profiling stats. + """ + + # Import the profile data, write the stats report to a StringIO and seek the start + # to allow the data to be read. The print_stats() explicitly does not filter for + # 'pyrealm' because the string can be found in virtual environment paths and leads + # to inconsistent behaviour across platforms + sio = StringIO() + p = pstats.Stats(str(prof_path), stream=sio) + p.sort_stats(pstats.SortKey.CUMULATIVE).print_stats() + sio.seek(0) + + # Consume lines from the report to find the header row + header_found = False + while not header_found: + header = sio.readline() + if "ncalls" in header: + header_found = True + + # Set replacement non-duplicated headers + column_names = [ + "ncalls", + "tottime", + "tottime_percall", + "cumtime", + "cumtime_percall", + "filename:lineno(function)", + ] + + # Convert to a DataFrame using fixed width format + df = pd.read_fwf(sio, engine="python", names=column_names, infer_nrows=10) + + # Reduce to rows not matching any of the regex exclude patterns + exclude_rows = pd.DataFrame( + [df["filename:lineno(function)"].str.contains(excl) for excl in exclude] + ).any() + df = df[~exclude_rows] + + # Add a timestamp from the file creation date + m_time = datetime.datetime.fromtimestamp(prof_path.stat().st_mtime) + df["timestamp"] = m_time.isoformat(timespec="seconds") + + df.to_csv("profiling-database.csv") + + return df + + +def run_simple_benchmarking_cli() -> None: + """Run the simple benchmarking.""" + + if run_simple_benchmarking_cli.__doc__ is not None: + doc = " " + run_simple_benchmarking_cli.__doc__ + else: + doc = "Python in -OO mode" + + parser = ArgumentParser( + description=textwrap.dedent(doc), + ) + parser.add_argument( + "prof_path", + type=Path, + help="Path to pytest-profiling output", + ) + + args = parser.parse_args() + + # Copy the profiling results to the current folder + if not args.prof_path.exists(): + raise FileNotFoundError(f"Cannot find the profiling file at {args.prof_path}.") + + success = run_simple_benchmarking(prof_path=args.prof_path) + + if not success: + print("Benchmarking failed.") + sys.exit(1) + + print("Benchmarking passed.") + sys.exit(0) + + +if __name__ == "__main__": + run_simple_benchmarking_cli() diff --git a/tests/profiling/pmodel/test_profiling_pmodel.py b/tests/profiling/pmodel/test_profiling_pmodel.py index 64c161ef..488439d4 100644 --- a/tests/profiling/pmodel/test_profiling_pmodel.py +++ b/tests/profiling/pmodel/test_profiling_pmodel.py @@ -28,7 +28,7 @@ def test_profiling_pmodel(pmodel_profile_data): gpp_c3_annual = pmod_c3.gpp * (60 * 60 * 24 * 365) * 1e-6 gpp_c4_annual = pmod_c4.gpp * (60 * 60 * 24 * 365) * 1e-6 - # Fit the competition model - making some extrenely poor judgements about what + # Fit the competition model - making some extremely poor judgements about what # is cropland and what is below the minimum temperature that really should be # fixed. comp = C3C4Competition(