From 06ad0e47c15acd1cd4728749d8ebacac14fbdd4b Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Fri, 20 Sep 2024 16:15:50 -0400 Subject: [PATCH 1/5] Prepare for branch renaming. (#367) * Add `amd-staging` and `amd-mainline` to workflow * Update links in documentation to use amd-mainline --------- Signed-off-by: David Galiffi --- .github/workflows/containers.yml | 2 +- .github/workflows/cpack.yml | 4 ++-- .github/workflows/docs.yml | 4 ++-- .github/workflows/formatting.yml | 4 ++-- .github/workflows/opensuse.yml | 4 ++-- .github/workflows/python.yml | 4 ++-- .github/workflows/redhat.yml | 4 ++-- .github/workflows/ubuntu-focal.yml | 4 ++-- .github/workflows/ubuntu-jammy.yml | 4 ++-- docs/index.rst | 2 +- docs/reference/development-guide.rst | 20 ++++++++++---------- docs/sphinx/_toc.yml.in | 2 +- source/docs/development.md | 20 ++++++++++---------- 13 files changed, 39 insertions(+), 39 deletions(-) diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index 6c9cbbefb..db5441d79 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -7,7 +7,7 @@ on: schedule: - cron: 0 5 * * * push: - branches: [main] + branches: [main, amd-mainline] paths: - '.github/workflows/containers.yml' - 'docker/**' diff --git a/.github/workflows/cpack.yml b/.github/workflows/cpack.yml index 4615511be..00f82f042 100644 --- a/.github/workflows/cpack.yml +++ b/.github/workflows/cpack.yml @@ -4,7 +4,7 @@ run-name: cpack on: workflow_dispatch: push: - branches: [main, releases/**] + branches: [main, amd-mainline, release/**] tags: - v* paths-ignore: @@ -12,7 +12,7 @@ on: - 'docs/**' - 'source/docs/**' pull_request: - branches: [main] + branches: [main, amd-mainline] paths: - '.github/workflows/cpack.yml' - 'docker/**' diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c5af45159..4b2340fd3 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -3,14 +3,14 @@ name: Documentation on: workflow_dispatch: push: - branches: [main, gh-pages] + branches: [main, amd-mainline, gh-pages] paths: - '*.md' - 'source/docs/**' - '.github/workflows/docs.yml' - 'VERSION' pull_request: - branches: [main] + branches: [main, amd-mainline] paths: - '*.md' - 'source/docs/**' diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index 6e425d7bd..f3e4b7df0 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -4,9 +4,9 @@ run-name: formatting on: push: - branches: [ main, develop ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] pull_request: - branches: [ main, develop ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/.github/workflows/opensuse.yml b/.github/workflows/opensuse.yml index 700f3b33b..ecec47ece 100644 --- a/.github/workflows/opensuse.yml +++ b/.github/workflows/opensuse.yml @@ -3,7 +3,7 @@ run-name: opensuse-15 on: push: - branches: [ main, develop, release/** ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] paths-ignore: - '*.md' - 'docs/**' @@ -15,7 +15,7 @@ on: - '.github/workflows/formatting.yml' - 'docker/**' pull_request: - branches: [ main, develop, release/** ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] paths-ignore: - '*.md' - 'docs/**' diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 068118888..939da19b1 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -3,12 +3,12 @@ run-name: Python on: push: - branches: [ main, develop ] + branches: [ main, develop, amd-mainline, amd-staging ] paths: - 'source/python/gui/*.py' - 'source/python/gui/**/*.py' pull_request: - branches: [ main, develop ] + branches: [ main, develop, amd-mainline, amd-staging ] paths: - 'source/python/gui/*.py' - 'source/python/gui/**/*.py' diff --git a/.github/workflows/redhat.yml b/.github/workflows/redhat.yml index 474dac56a..0a3869d80 100644 --- a/.github/workflows/redhat.yml +++ b/.github/workflows/redhat.yml @@ -3,7 +3,7 @@ run-name: redhat on: push: - branches: [ main, develop, release/** ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] paths-ignore: - '*.md' - 'docs/**' @@ -15,7 +15,7 @@ on: - '.github/workflows/formatting.yml' - 'docker/**' pull_request: - branches: [ main, develop, release/** ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] paths-ignore: - '*.md' - 'docs/**' diff --git a/.github/workflows/ubuntu-focal.yml b/.github/workflows/ubuntu-focal.yml index 5a4d53379..5be371a0d 100644 --- a/.github/workflows/ubuntu-focal.yml +++ b/.github/workflows/ubuntu-focal.yml @@ -3,7 +3,7 @@ run-name: ubuntu-focal on: push: - branches: [ main, develop, release/** ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] paths-ignore: - '*.md' - 'docs/**' @@ -15,7 +15,7 @@ on: - '.github/workflows/formatting.yml' - 'docker/**' pull_request: - branches: [ main, develop, release/** ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] paths-ignore: - '*.md' - 'docs/**' diff --git a/.github/workflows/ubuntu-jammy.yml b/.github/workflows/ubuntu-jammy.yml index edaae62e9..a7d1d5331 100644 --- a/.github/workflows/ubuntu-jammy.yml +++ b/.github/workflows/ubuntu-jammy.yml @@ -3,7 +3,7 @@ run-name: ubuntu-jammy on: push: - branches: [ main, develop, release/** ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] paths-ignore: - '*.md' - 'docs/**' @@ -15,7 +15,7 @@ on: - '.github/workflows/formatting.yml' - 'docker/**' pull_request: - branches: [ main, develop, release/** ] + branches: [ main, develop, amd-mainline, amd-staging, release/** ] paths-ignore: - '*.md' - 'docs/**' diff --git a/docs/index.rst b/docs/index.rst index a9f371a68..27a92eb5d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,7 +30,7 @@ The documentation is structured as follows: .. grid-item-card:: Tutorials - * `GitHub examples `_ + * `GitHub examples `_ * :doc:`Video tutorials <./tutorials/video-tutorials>` .. grid-item-card:: How to diff --git a/docs/reference/development-guide.rst b/docs/reference/development-guide.rst index d58c4763c..d04338ede 100644 --- a/docs/reference/development-guide.rst +++ b/docs/reference/development-guide.rst @@ -15,7 +15,7 @@ Executables This section lists the Omnitrace executables. -omnitrace-avail: `source/bin/omnitrace-avail `_ +omnitrace-avail: `source/bin/omnitrace-avail `_ ------------------------------------------------------------------------------------------------------------------------------- The ``main`` routine of ``omnitrace-avail`` has three important sections: @@ -24,7 +24,7 @@ The ``main`` routine of ``omnitrace-avail`` has three important sections: * Printing options * Printing hardware counters -omnitrace-sample: `source/bin/omnitrace-sample `_ +omnitrace-sample: `source/bin/omnitrace-sample `_ ------------------------------------------------------------------------------------------------------------------------------- * Requires a command-line format of ``omnitrace-sample -- `` @@ -32,7 +32,7 @@ omnitrace-sample: `source/bin/omnitrace-sample `` and a modified environment -omnitrace-casual: `source/bin/omnitrace-causal `_ +omnitrace-casual: `source/bin/omnitrace-causal `_ ------------------------------------------------------------------------------------------------------------------------------- When there is exactly one causal profiling configuration variant (which enables debugging), @@ -45,7 +45,7 @@ the following actions take place for each variant: * the child process launches `` `` using ``execvpe``, which modifies the environment for the variant * the parent process waits for the child process to finish -omnitrace-instrument: `source/bin/omnitrace-instrument `_ +omnitrace-instrument: `source/bin/omnitrace-instrument `_ ------------------------------------------------------------------------------------------------------------------------------------------- * Requires a command-line format of ``omnitrace-instrument -- `` @@ -70,31 +70,31 @@ omnitrace-instrument: `source/bin/omnitrace-instrument `_ +Common library: `source/lib/common `_ -------------------------------------------------------------------------------------------------------------------------------- * General header-only functionality used in multiple executables and/or libraries. * Not installed or exported outside of the build tree. -Core library: `source/lib/core `_ +Core library: `source/lib/core `_ -------------------------------------------------------------------------------------------------------------------------------- * Static PIC library with functionality that does not depend on any components. * Not installed or exported outside of the build tree. -Binary library: `source/lib/binary `_ +Binary library: `source/lib/binary `_ -------------------------------------------------------------------------------------------------------------------------------- * Static PIC library with functionality for reading/analyzing binary info. * Mostly used by the causal profiling sections of ``libomnitrace``. * Not installed or exported outside of the build tree. -libomnitrace: `source/lib/omnitrace `_ +libomnitrace: `source/lib/omnitrace `_ -------------------------------------------------------------------------------------------------------------------------------- This is the main library encapsulating all the capabilities. -libomnitrace-dl: `source/lib/omnitrace-dl `_ +libomnitrace-dl: `source/lib/omnitrace-dl `_ -------------------------------------------------------------------------------------------------------------------------------- This is a lightweight, front-end library for ``libomnitrace`` which serves three primary purposes: @@ -105,7 +105,7 @@ This is a lightweight, front-end library for ``libomnitrace`` which serves three * Prevents re-entry if ``libomnitrace`` calls an instrumented function internally * Coordinates communication between ``libomnitrace-user`` and ``libomnitrace`` -libomnitrace-user: `source/lib/omnitrace-user `_ +libomnitrace-user: `source/lib/omnitrace-user `_ -------------------------------------------------------------------------------------------------------------------------------- * Provides a set of functions and types for the users to add to their code, for example, diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in index ac2e9834e..586bb6919 100644 --- a/docs/sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -17,7 +17,7 @@ subtrees: - caption: Tutorials entries: - - url: https://github.com/ROCm/omnitrace/tree/main/examples + - url: https://github.com/ROCm/omnitrace/tree/amd-mainline/examples title: GitHub examples - file: tutorials/video-tutorials.rst title: Video tutorials diff --git a/source/docs/development.md b/source/docs/development.md index 45f76a2ae..865d74bb8 100644 --- a/source/docs/development.md +++ b/source/docs/development.md @@ -7,7 +7,7 @@ ## Executables -### omnitrace-avail: [source/bin/omnitrace-avail](https://github.com/ROCm/omnitrace/tree/main/source/bin/omnitrace-avail) +### omnitrace-avail: [source/bin/omnitrace-avail](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/bin/omnitrace-avail) The main of `omnitrace-avail` has three important sections: @@ -15,7 +15,7 @@ The main of `omnitrace-avail` has three important sections: 2. Printing options 3. Printing hardware counters -### omnitrace-sample: [source/bin/omnitrace-sample](https://github.com/ROCm/omnitrace/tree/main/source/bin/omnitrace-sample) +### omnitrace-sample: [source/bin/omnitrace-sample](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/bin/omnitrace-sample) General design: @@ -24,7 +24,7 @@ General design: - Adds `libomnitrace-dl.so` to `LD_PRELOAD` - Application is launched via `execvpe` with ` ` and modified environment -### omnitrace-casual: [source/bin/omnitrace-causal](https://github.com/ROCm/omnitrace/tree/main/source/bin/omnitrace-causal) +### omnitrace-casual: [source/bin/omnitrace-causal](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/bin/omnitrace-causal) Nearly identical design to [omnitrace-sample](#omnitrace-sample-sourcebinomnitrace-sample) when there is exactly one causal profiling configuration variant (this enables debugging). @@ -36,7 +36,7 @@ for each variant: - child process launches ` ` via `execvpe` which modified environment for variant - parent process waits for child process to finish -### omnitrace-instrument: [source/bin/omnitrace-instrument](https://github.com/ROCm/omnitrace/tree/main/source/bin/omnitrace-instrument) +### omnitrace-instrument: [source/bin/omnitrace-instrument](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/bin/omnitrace-instrument) - Requires a command-line format of `omnitrace-instrument -- ` - User specifies in options whether they want to do runtime instrumentation, binary rewrite, or attach to process @@ -52,24 +52,24 @@ for each variant: ## Libraries -### Common Library: [source/lib/common](https://github.com/ROCm/omnitrace/tree/main/source/lib/common) +### Common Library: [source/lib/common](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/lib/common) General header-only functionality used in multiple executables and/or libraries. Not installed or exported outside of the build tree. -### Core Library: [source/lib/core](https://github.com/ROCm/omnitrace/tree/main/source/lib/core) +### Core Library: [source/lib/core](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/lib/core) Static PIC library with functionality that does not depend on any components. Not installed or exported outside of the build tree. -### Binary Library: [source/lib/binary](https://github.com/ROCm/omnitrace/tree/main/source/lib/binary) +### Binary Library: [source/lib/binary](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/lib/binary) Static PIC library with functionality for reading/analyzing binary info. Mostly used by the causal profiling sections of [libomnitrace](#libomnitrace-sourcelibomnitrace). Not installed or exported outside of the build tree. -### libomnitrace: [source/lib/omnitrace](https://github.com/ROCm/omnitrace/tree/main/source/lib/omnitrace) +### libomnitrace: [source/lib/omnitrace](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/lib/omnitrace) This is the main library encapsulating all the capabilities. -### libomnitrace-dl: [source/lib/omnitrace-dl](https://github.com/ROCm/omnitrace/tree/main/source/lib/omnitrace-dl) +### libomnitrace-dl: [source/lib/omnitrace-dl](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/lib/omnitrace-dl) Lightweight, front-end library for [libomnitrace](#libomnitrace-sourcelibomnitrace) which serves 3 primary purposes: @@ -77,7 +77,7 @@ Lightweight, front-end library for [libomnitrace](#libomnitrace-sourcelibomnitra 2. Prevents re-entry if [libomnitrace](#libomnitrace-sourcelibomnitrace) calls an instrumentated function internally) 3. Coordinates communication between [libomnitrace-user](#libomnitrace-user-sourcelibomnitrace-user) and [libomnitrace](#libomnitrace-sourcelibomnitrace) -### libomnitrace-user: [source/lib/omnitrace-user](https://github.com/ROCm/omnitrace/tree/main/source/lib/omnitrace-user) +### libomnitrace-user: [source/lib/omnitrace-user](https://github.com/ROCm/omnitrace/tree/amd-mainline/source/lib/omnitrace-user) Provides a set of functions and types for the users to add to their code, e.g. disabling data collection globally or on a specific thread, user-defined regions, etc. If [libomnitrace-dl](#libomnitrace-dl-sourcelibomnitrace-dl) is not loaded, the user API is effectively no-op From abff23ac4238da6d7891d9ac9f36a919e30bf759 Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Mon, 23 Sep 2024 13:43:16 -0400 Subject: [PATCH 2/5] Update VERSION to 1.12.0 (#387) Bumping version since rocm-rel-6.3.0.1 branched. --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 3d0e62313..0eed1a29e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.11.4 +1.12.0 From e60de051d6cef08b0219ed43e982a3708bd6500e Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Tue, 24 Sep 2024 17:44:57 -0400 Subject: [PATCH 3/5] Add rocm tag to cpack.yml (#389) * Add rocm tag to cpack.yml * Update softprops/action-gh-release to v2 --- .github/workflows/cpack.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cpack.yml b/.github/workflows/cpack.yml index 00f82f042..a564b6899 100644 --- a/.github/workflows/cpack.yml +++ b/.github/workflows/cpack.yml @@ -6,7 +6,8 @@ on: push: branches: [main, amd-mainline, release/**] tags: - - v* + - "v[1-9].[0-9]+.[0-9]+*" + - "rocm-[1-9].[0-9]+.[0-9]+*" paths-ignore: - '*.md' - 'docs/**' @@ -308,7 +309,7 @@ jobs: done - name: Upload STGZ Release Assets - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 if: startsWith(github.ref, 'refs/tags/') && github.repository == 'ROCm/omnitrace' with: fail_on_unmatched_files: True From 3285a533d394e94e0b063dcb30960a3121f2d52d Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Wed, 25 Sep 2024 10:35:47 -0400 Subject: [PATCH 4/5] Include note about known issue with Perfetto v47.0 and above. (#391) --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a251eeaab..9abe9c8cc 100755 --- a/README.md +++ b/README.md @@ -7,6 +7,10 @@ [![Installer Packaging (CPack)](https://github.com/ROCm/omnitrace/actions/workflows/cpack.yml/badge.svg)](https://github.com/ROCm/omnitrace/actions/workflows/cpack.yml) [![Documentation](https://github.com/ROCm/omnitrace/actions/workflows/docs.yml/badge.svg)](https://github.com/ROCm/omnitrace/actions/workflows/docs.yml) +> [!NOTE] +> Perfetto validation is done with trace_processor v46.0, as there is a known issue with v47.0. +If you are experiencing problems viewing your trace in the latest version of [Perfetto](http://ui.perfetto.dev), then try using [Perfetto UI v46.0](https://ui.perfetto.dev/v46.0-35b3d9845/#!/). + ## Overview AMD Research is seeking to improve observability and performance analysis for software running on AMD heterogeneous systems. From 17ff1dc31d13ec5e735485dd2c775da4547c994b Mon Sep 17 00:00:00 2001 From: David Galiffi Date: Wed, 25 Sep 2024 22:34:46 -0400 Subject: [PATCH 5/5] Reference "Known Issue" next to references of Perfetto. (#393) * And provide workaround link to Perfetto v46.0 Co-authored-by: Peter Park --- .../how-to/understanding-omnitrace-output.rst | 62 +++++++------ docs/install/install.rst | 93 ++++++++++--------- docs/reference/development-guide.rst | 8 +- docs/what-is-omnitrace.rst | 13 ++- 4 files changed, 96 insertions(+), 80 deletions(-) diff --git a/docs/how-to/understanding-omnitrace-output.rst b/docs/how-to/understanding-omnitrace-output.rst index b7301f414..08d560992 100644 --- a/docs/how-to/understanding-omnitrace-output.rst +++ b/docs/how-to/understanding-omnitrace-output.rst @@ -28,7 +28,7 @@ For example, starting with the following base configuration: [omnitrace] Outputting 'omnitrace-example-output/wall-clock.txt'... [omnitrace] Outputting 'omnitrace-example-output/wall-clock.json'... -If the ``OMNITRACE_USE_PID`` option is enabled, then running a non-MPI executable +If the ``OMNITRACE_USE_PID`` option is enabled, then running a non-MPI executable with a PID of ``63453`` results in the following output: .. code-block:: shell @@ -58,7 +58,7 @@ Metadata ======================================== Omnitrace outputs a ``metadata.json`` file. This metadata file contains -information about the settings, environment variables, output files, and info +information about the settings, environment variables, output files, and info about the system and the run, as follows: * Hardware cache sizes @@ -240,14 +240,14 @@ Metadata JSON Sample Configuring the Omnitrace output ======================================== -Omnitrace includes a core set of options for controlling the format +Omnitrace includes a core set of options for controlling the format and contents of the output files. For additional information, see the guide on :doc:`configuring runtime options <./configuring-runtime-options>`. Core configuration settings ----------------------------------- -.. csv-table:: +.. csv-table:: :header: "Setting", "Value", "Description" :widths: 30, 30, 100 @@ -261,20 +261,20 @@ Core configuration settings Output prefix keys ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Output prefix keys have many uses but are most helpful when dealing with multiple +Output prefix keys have many uses but are most helpful when dealing with multiple profiling runs or large MPI jobs. -They are included in Omnitrace because they were introduced into Timemory +They are included in Omnitrace because they were introduced into Timemory for `compile-time-perf `_. -They are needed to create different output files for a generic wrapper around +They are needed to create different output files for a generic wrapper around compilation commands while still overwriting the output from the last time a file was compiled. -When doing scaling studies and specifying options via the command line, +When doing scaling studies and specifying options via the command line, the recommended process is to use a common ``OMNITRACE_OUTPUT_PATH``, disable ``OMNITRACE_TIME_OUTPUT``, set ``OMNITRACE_OUTPUT_PREFIX="%argt%-"``, and let Omnitrace cleanly organize the output. -.. csv-table:: +.. csv-table:: :header: "String", "Encoding" :widths: 20, 120 @@ -311,16 +311,22 @@ set ``OMNITRACE_OUTPUT_PREFIX="%argt%-"``, and let Omnitrace cleanly organize th .. note:: In any output prefix key which contains a ``/`` character, the ``/`` characters - are replaced with ``_`` and any leading underscores are stripped. For example, - an ``%arg0%`` of ``/usr/bin/foo`` translates to ``usr_bin_foo``. Additionally, any ``%arg%`` keys which + are replaced with ``_`` and any leading underscores are stripped. For example, + an ``%arg0%`` of ``/usr/bin/foo`` translates to ``usr_bin_foo``. Additionally, any ``%arg%`` keys which do not have a command line argument at position ```` are ignored. Perfetto output ======================================== -Use the ``OMNITRACE_OUTPUT_FILE`` to specify a specific location. If this is an +Use the ``OMNITRACE_OUTPUT_FILE`` to specify a specific location. If this is an absolute path, then all ``OMNITRACE_OUTPUT_PATH`` and similar -settings are ignored. Visit `ui.perfetto.dev `_ and open this file. +settings are ignored. Visit `ui.perfetto.dev `_ and open +this file. + +.. important:: + Perfetto validation is done with trace_processor v46.0 as there is a known issue with v47.0. + If you are experiencing problems viewing your trace in the latest version of `Perfetto `_, + then try using `Perfetto UI v46.0 `_. .. image:: ../data/omnitrace-perfetto.png :alt: Visualization of a performance graph in Perfetto @@ -349,20 +355,20 @@ Use ``omnitrace-avail --components --filename`` to view the base filename for ea | sampling_wall_clock | true | sampling_wall_clock | |---------------------------------|---------------|------------------------| -The ``OMNITRACE_COLLAPSE_THREADS`` and ``OMNITRACE_COLLAPSE_PROCESSES`` settings are -only valid when full `MPI support is enabled <../install/install.html#mpi-support-within-omnitrace>`_. -When they are set, Timemory combines the per-thread and per-rank data (respectively) of +The ``OMNITRACE_COLLAPSE_THREADS`` and ``OMNITRACE_COLLAPSE_PROCESSES`` settings are +only valid when full `MPI support is enabled <../install/install.html#mpi-support-within-omnitrace>`_. +When they are set, Timemory combines the per-thread and per-rank data (respectively) of identical call stacks. -The ``OMNITRACE_FLAT_PROFILE`` setting removes all call stack hierarchy. +The ``OMNITRACE_FLAT_PROFILE`` setting removes all call stack hierarchy. Using ``OMNITRACE_FLAT_PROFILE=ON`` in combination -with ``OMNITRACE_COLLAPSE_THREADS=ON`` is a useful configuration for identifying +with ``OMNITRACE_COLLAPSE_THREADS=ON`` is a useful configuration for identifying min/max measurements regardless of the calling context. -The ``OMNITRACE_TIMELINE_PROFILE`` setting (with ``OMNITRACE_FLAT_PROFILE=OFF``) effectively +The ``OMNITRACE_TIMELINE_PROFILE`` setting (with ``OMNITRACE_FLAT_PROFILE=OFF``) effectively generates similar data to that found -in Perfetto. Enabling timeline and flat profiling effectively generates +in Perfetto. Enabling timeline and flat profiling effectively generates similar data to ``strace``. However, while Timemory generally -requires significantly less memory than Perfetto, this is not the case in timeline +requires significantly less memory than Perfetto, this is not the case in timeline mode, so use this setting with caution. Timemory text output @@ -381,11 +387,11 @@ The truncation settings be changed through the ``OMNITRACE_MAX_WIDTH`` setting. Timemory text output example ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In the following example, the ``NN`` field in ``|NN>>>`` is the thread ID. If MPI support is enabled, +In the following example, the ``NN`` field in ``|NN>>>`` is the thread ID. If MPI support is enabled, this becomes ``|MM|NN>>>`` where ``MM`` is the rank. -If ``OMNITRACE_COLLAPSE_THREADS=ON`` and ``OMNITRACE_COLLAPSE_PROCESSES=ON`` are configured, +If ``OMNITRACE_COLLAPSE_THREADS=ON`` and ``OMNITRACE_COLLAPSE_PROCESSES=ON`` are configured, neither the ``MM`` nor the ``NN`` are present unless the -component explicitly sets type traits. Type traits specify that the data is only +component explicitly sets type traits. Type traits specify that the data is only relevant per-thread or per-process, such as the ``thread_cpu_clock`` clock component. .. code-block:: shell @@ -573,15 +579,15 @@ relevant per-thread or per-process, such as the ``thread_cpu_clock`` clock compo Timemory JSON output ------------------------------------------------------------------------- -Timemory represents the data within the JSON output in two forms: +Timemory represents the data within the JSON output in two forms: a flat structure and a hierarchical structure. The flat JSON data represents the data similar to the text files, where the hierarchical information is represented by the indentation of the ``prefix`` field and the ``depth`` field. -The hierarchical JSON contains additional information with respect +The hierarchical JSON contains additional information with respect to inclusive and exclusive values. However, its structure must be processed using recursion. This section of the JSON output supports analysis by `hatchet `_. -All the data entries for the flat structure are in a single JSON array. It is easier to +All the data entries for the flat structure are in a single JSON array. It is easier to write a simple Python script for post-processing using this format than with the hierarchical structure. .. note:: @@ -929,7 +935,7 @@ Timemory JSON output Python post-processing example ) ) -The result of applying this script to the corresponding JSON output from the :ref:`text-output-example-label` +The result of applying this script to the corresponding JSON output from the :ref:`text-output-example-label` section is as follows: .. code-block:: shell diff --git a/docs/install/install.rst b/docs/install/install.rst index f0ee1662c..8973e5ed1 100644 --- a/docs/install/install.rst +++ b/docs/install/install.rst @@ -18,8 +18,8 @@ Release links To review and install either the current Omnitrace release or earlier releases, use these links: -* Latest Omnitrace Release: ``_ -* All Omnitrace Releases: ``_ +* Latest Omnitrace Release: ``_ +* All Omnitrace Releases: ``_ Operating system support ======================================== @@ -39,7 +39,7 @@ Other OS distributions might function but are not supported or tested. Identifying the operating system ----------------------------------- -If you are unsure of the operating system and version, the ``/etc/os-release`` and +If you are unsure of the operating system and version, the ``/etc/os-release`` and ``/usr/lib/os-release`` files contain operating system identification data for Linux systems. .. code-block:: shell @@ -84,8 +84,8 @@ For example, ... omnitrace-1.0.0-ubuntu-20.04-ROCm-50000-OMPT-PAPI-Python3.sh -Any of the ``EXTRA`` fields with a CMake build option -(for example, PAPI, as referenced in a following section) or +Any of the ``EXTRA`` fields with a CMake build option +(for example, PAPI, as referenced in a following section) or with no link requirements (such as OMPT) have self-contained support for these packages. @@ -113,17 +113,17 @@ Installing Omnitrace from source ======================================== Omnitrace needs a GCC compiler with full support for C++17 and CMake v3.16 or higher. -The Clang compiler may be used in lieu of the GCC compiler if `Dyninst `_ +The Clang compiler may be used in lieu of the GCC compiler if `Dyninst `_ is already installed. Build requirements ----------------------------------- * GCC compiler v7+ - + * Older GCC compilers may be supported but are not tested * Clang compilers are generally supported for Omnitrace but not Dyninst - + * `CMake `_ v3.16+ .. note:: @@ -139,7 +139,7 @@ Build requirements Required third-party packages ----------------------------------- -* `Dyninst `_ for dynamic or static instrumentation. +* `Dyninst `_ for dynamic or static instrumentation. Dyninst uses the following required and optional components. * `TBB `_ (required) @@ -155,7 +155,7 @@ during the Omnitrace build. The following list indicates the package, the versio the application that requires the package (for example, Omnitrace requires Dyninst while Dyninst requires TBB), and the CMake option to build the package alongside Omnitrace: -.. csv-table:: +.. csv-table:: :header: "Third-Party Library", "Minimum Version", "Required By", "CMake Option" :widths: 15, 10, 12, 40 @@ -182,13 +182,13 @@ Optional third-party packages * ``OMNITRACE_USE_MPI`` enables full MPI support * ``OMNITRACE_USE_MPI_HEADERS`` enables wrapping of the dynamically-linked MPI C function calls. - (By default, if Omnitrace cannot find an OpenMPI MPI distribution, it uses a local copy + (By default, if Omnitrace cannot find an OpenMPI MPI distribution, it uses a local copy of the OpenMPI ``mpi.h``.) -* Several optional third-party profiling tools supported by Timemory +* Several optional third-party profiling tools supported by Timemory (for example, `Caliper `_, `TAU `_, CrayPAT, and others) -.. csv-table:: +.. csv-table:: :header: "Third-Party Library", "CMake Enable Option", "CMake Build Option" :widths: 15, 45, 40 @@ -204,10 +204,10 @@ The easiest way to install Dyninst is alongside Omnitrace, but it can also be in Building Dyninst alongside Omnitrace ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -To install Dyninst alongside Omnitrace, configure Omnitrace with ``OMNITRACE_BUILD_DYNINST=ON``. +To install Dyninst alongside Omnitrace, configure Omnitrace with ``OMNITRACE_BUILD_DYNINST=ON``. Depending on the version of Ubuntu, the ``apt`` package manager might have current enough -versions of the Dyninst Boost, TBB, and LibIberty dependencies -(use ``apt-get install libtbb-dev libiberty-dev libboost-dev``). +versions of the Dyninst Boost, TBB, and LibIberty dependencies +(use ``apt-get install libtbb-dev libiberty-dev libboost-dev``). However, it is possible to request Dyninst to install its dependencies via ``DYNINST_BUILD_=ON``, as follows: @@ -216,7 +216,7 @@ its dependencies via ``DYNINST_BUILD_=ON``, as follows: git clone https://github.com/ROCm/omnitrace.git omnitrace-source cmake -B omnitrace-build -DOMNITRACE_BUILD_DYNINST=ON -DDYNINST_BUILD_{TBB,ELFUTILS,BOOST,LIBIBERTY}=ON omnitrace-source -where ``-DDYNINST_BUILD_{TBB,BOOST,ELFUTILS,LIBIBERTY}=ON`` is expanded by +where ``-DDYNINST_BUILD_{TBB,BOOST,ELFUTILS,LIBIBERTY}=ON`` is expanded by the shell to ``-DDYNINST_BUILD_TBB=ON -DDYNINST_BUILD_BOOST=ON ...`` Installing Dyninst via Spack @@ -237,19 +237,24 @@ Installing Dyninst via Spack Installing Omnitrace ----------------------------------- -Omnitrace has CMake configuration options for MPI support (``OMNITRACE_USE_MPI`` or -``OMNITRACE_USE_MPI_HEADERS``), HIP kernel tracing (``OMNITRACE_USE_ROCTRACER``), -ROCm device sampling (``OMNITRACE_USE_ROCM_SMI``), OpenMP-Tools (``OMNITRACE_USE_OMPT``), +Omnitrace has CMake configuration options for MPI support (``OMNITRACE_USE_MPI`` or +``OMNITRACE_USE_MPI_HEADERS``), HIP kernel tracing (``OMNITRACE_USE_ROCTRACER``), +ROCm device sampling (``OMNITRACE_USE_ROCM_SMI``), OpenMP-Tools (``OMNITRACE_USE_OMPT``), hardware counters via PAPI (``OMNITRACE_USE_PAPI``), among other features. -Various additional features can be enabled via the +Various additional features can be enabled via the ``TIMEMORY_USE_*`` `CMake options `_. -Any ``OMNITRACE_USE_`` option which has a corresponding ``TIMEMORY_USE_`` +Any ``OMNITRACE_USE_`` option which has a corresponding ``TIMEMORY_USE_`` option means that the Timemory support for this feature has been integrated -into Perfetto support for Omnitrace, for example, ``OMNITRACE_USE_PAPI=`` also configures +into Perfetto support for Omnitrace, for example, ``OMNITRACE_USE_PAPI=`` also configures ``TIMEMORY_USE_PAPI=``. This means the data that Timemory is able to collect via this package -is passed along to Perfetto and is displayed when the ``.proto`` file is visualized +is passed along to Perfetto and is displayed when the ``.proto`` file is visualized in `the Perfetto UI `_. +.. important:: + Perfetto validation is done with trace_processor v46.0 as there is a known issue with v47.0. + If you are experiencing problems viewing your trace in the latest version of `Perfetto `_, + then try using `Perfetto UI v46.0 `_. + .. code-block:: shell git clone https://github.com/ROCm/omnitrace.git omnitrace-source @@ -280,26 +285,26 @@ MPI support within Omnitrace ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Omnitrace can have full (``OMNITRACE_USE_MPI=ON``) or partial (``OMNITRACE_USE_MPI_HEADERS=ON``) MPI support. -The only difference between these two modes is whether or not the results collected +The only difference between these two modes is whether or not the results collected via Timemory and/or Perfetto can be aggregated into a single -output file during finalization. When full MPI support is enabled, combining the +output file during finalization. When full MPI support is enabled, combining the Timemory results always occurs, whereas combining the Perfetto results is configurable via the ``OMNITRACE_PERFETTO_COMBINE_TRACES`` setting. -The primary benefits of partial or full MPI support are the automatic wrapping +The primary benefits of partial or full MPI support are the automatic wrapping of MPI functions and the ability -to label output with suffixes which correspond to the ``MPI_COMM_WORLD`` rank ID +to label output with suffixes which correspond to the ``MPI_COMM_WORLD`` rank ID instead of having to use the system process identifier (i.e. ``PID``). -In general, it's recommended to use partial MPI support with the OpenMPI +In general, it's recommended to use partial MPI support with the OpenMPI headers as this is the most portable configuration. -If full MPI support is selected, make sure your target application is built +If full MPI support is selected, make sure your target application is built against the same MPI distribution as Omnitrace. For example, do not build Omnitrace with MPICH and use it on a target application built against OpenMPI. If partial support is selected, the reason the OpenMPI headers are recommended instead of the MPICH headers is -because the ``MPI_COMM_WORLD`` in OpenMPI is a pointer to ``ompi_communicator_t`` (8 bytes), -whereas ``MPI_COMM_WORLD`` in MPICH is an ``int`` (4 bytes). Building Omnitrace with partial MPI support +because the ``MPI_COMM_WORLD`` in OpenMPI is a pointer to ``ompi_communicator_t`` (8 bytes), +whereas ``MPI_COMM_WORLD`` in MPICH is an ``int`` (4 bytes). Building Omnitrace with partial MPI support and the MPICH headers and then using -Omnitrace on an application built against OpenMPI causes a segmentation fault. +Omnitrace on an application built against OpenMPI causes a segmentation fault. This happens because the value of the ``MPI_COMM_WORLD`` is truncated during the function wrapping before being passed along to the underlying MPI function. @@ -330,7 +335,7 @@ Alternatively, you can directly source the ``setup-env.sh`` script: Test the executables ----------------------------------- -Successful execution of these commands confirms that the installation does not have any +Successful execution of these commands confirms that the installation does not have any issues locating the installed libraries: .. code-block:: shell @@ -353,7 +358,7 @@ This section explains how to resolve certain issues that might happen when you f Issues with RHEL and SELinux ---------------------------------------------------- -RHEL (Red Hat Enterprise Linux) and related distributions of Linux automatically enable a security feature +RHEL (Red Hat Enterprise Linux) and related distributions of Linux automatically enable a security feature named SELinux (Security-Enhanced Linux) that prevents Omnitrace from running. This issue applies to any Linux distribution with SELinux installed, including RHEL, CentOS, Fedora, and Rocky Linux. The problem can happen with any GPU, or even without a GPU. @@ -367,7 +372,7 @@ run ``omnitrace-run`` with the instrumented program. omniperf-instrument -M sampling -o hello.instr -- ./hello omnitrace-run -- ./hello.instr -Instead of successfully running the binary with call-stack sampling, +Instead of successfully running the binary with call-stack sampling, Omnitrace crashes with a segmentation fault. .. note:: @@ -375,10 +380,10 @@ Omnitrace crashes with a segmentation fault. If you are physically logged in on the system (not using SSH or a remote connection), the operating system might display an SELinux pop-up warning in the notifications. -To workaround this problem, either disable SELinux or configure it to use a more +To workaround this problem, either disable SELinux or configure it to use a more permissive setting. -To avoid this problem for the duration of the current session, run this command +To avoid this problem for the duration of the current session, run this command from the shell: .. code-block:: shell @@ -386,25 +391,25 @@ from the shell: sudo setenforce 0 For a permanent workaround, edit the SELinux configuration file using the command -``sudo vim /etc/sysconfig/selinux`` and change the ``SELINUX`` setting to +``sudo vim /etc/sysconfig/selinux`` and change the ``SELINUX`` setting to either ``Permissive`` or ``Disabled``. .. note:: - Permanently changing the SELinux settings can have security implications. + Permanently changing the SELinux settings can have security implications. Ensure you review your system security settings before making any changes. Modifying RPATH details ---------------------------------------------------- -If you're experiencing problems loading your application with an instrumented library, -then you might have to check and modify the RPATH specified in your application. +If you're experiencing problems loading your application with an instrumented library, +then you might have to check and modify the RPATH specified in your application. See the section on `troubleshooting RPATHs <../how-to/instrumenting-rewriting-binary-application.html#rpath-troubleshooting>`_ for further details. Configuring PAPI to collect hardware counters ---------------------------------------------------- -To use PAPI to collect the majority of hardware counters, ensure -the ``/proc/sys/kernel/perf_event_paranoid`` setting has a value less than or equal to ``2``. +To use PAPI to collect the majority of hardware counters, ensure +the ``/proc/sys/kernel/perf_event_paranoid`` setting has a value less than or equal to ``2``. For more information, see the :ref:`omnitrace_papi_events` section. \ No newline at end of file diff --git a/docs/reference/development-guide.rst b/docs/reference/development-guide.rst index d04338ede..d2140199c 100644 --- a/docs/reference/development-guide.rst +++ b/docs/reference/development-guide.rst @@ -25,7 +25,7 @@ The ``main`` routine of ``omnitrace-avail`` has three important sections: * Printing hardware counters omnitrace-sample: `source/bin/omnitrace-sample `_ -------------------------------------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------------------------------------- * Requires a command-line format of ``omnitrace-sample -- `` * Translates command-line options into environment variables @@ -33,7 +33,7 @@ omnitrace-sample: `source/bin/omnitrace-sample `` and a modified environment omnitrace-casual: `source/bin/omnitrace-causal `_ -------------------------------------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------------------------------------- When there is exactly one causal profiling configuration variant (which enables debugging), ``omnitrace-casual`` has a nearly identical design to ``omnitrace-sample`` @@ -46,7 +46,7 @@ the following actions take place for each variant: * the parent process waits for the child process to finish omnitrace-instrument: `source/bin/omnitrace-instrument `_ -------------------------------------------------------------------------------------------------------------------------------------------- +---------------------------------------------------------------------------------------------------------------------------------------------- * Requires a command-line format of ``omnitrace-instrument -- `` * Allows the user to provide options specifying whether to perform runtime instrumentation, use binary rewrite, or @@ -409,4 +409,4 @@ to this sequence: Eventually, the goal is to migrate all subsets of data collection which currently support more rudimentary models of time window constraints, such as process sampling and causal profiling, -to this model. \ No newline at end of file +to this model. diff --git a/docs/what-is-omnitrace.rst b/docs/what-is-omnitrace.rst index e2112688e..4ad340b8c 100644 --- a/docs/what-is-omnitrace.rst +++ b/docs/what-is-omnitrace.rst @@ -12,13 +12,18 @@ instrumentation, call-stack sampling, and various other features for determining which function and line number are currently executing. A visualization of the comprehensive Omnitrace results can be observed in any modern -web browser. Upload the Perfetto (``.proto``) output files produced by Omnitrace at +web browser. Upload the Perfetto (``.proto``) output files produced by Omnitrace at `ui.perfetto.dev `_ to see the details. -Aggregated high-level results are available as human-readable text files and -JSON files for programmatic analysis. The JSON output files are compatible with the +.. important:: + Perfetto validation is done with trace_processor v46.0 as there is a known issue with v47.0. + If you are experiencing problems viewing your trace in the latest version of `Perfetto `_, + then try using `Perfetto UI v46.0 `_. + +Aggregated high-level results are available as human-readable text files and +JSON files for programmatic analysis. The JSON output files are compatible with the `hatchet `_ Python package. Hatchet converts -the performance data into pandas data frames and facilitates multi-run comparisons, filtering, +the performance data into pandas data frames and facilitates multi-run comparisons, filtering, and visualization in Jupyter notebooks. To use Omnitrace for instrumentation, follow these two configuration steps: