From 7cb62dce864104f3abc2cb06b3568f44c658cca4 Mon Sep 17 00:00:00 2001 From: PhilipDeegan Date: Sat, 10 Aug 2024 17:42:53 +0200 Subject: [PATCH] phlop runtime process monitoring --- pyphare/pyphare/simulator/monitoring.py | 48 +++++++++++++++++++++++++ pyphare/pyphare/simulator/simulator.py | 10 ++++-- src/simulator/simulator.hpp | 2 -- tools/python3/phloping.py | 16 ++++++--- 4 files changed, 67 insertions(+), 9 deletions(-) create mode 100644 pyphare/pyphare/simulator/monitoring.py diff --git a/pyphare/pyphare/simulator/monitoring.py b/pyphare/pyphare/simulator/monitoring.py new file mode 100644 index 000000000..fb2703e01 --- /dev/null +++ b/pyphare/pyphare/simulator/monitoring.py @@ -0,0 +1,48 @@ +from pathlib import Path + + +def have_phlop(): + from importlib.util import find_spec + + try: + return find_spec("phlop.dict") is not None + except (ImportError, ModuleNotFoundError): + return False + + +def valdict(**kwargs): + if not have_phlop(): + return dict + + from phlop.dict import ValDict # pylint: disable=import-error + + return ValDict(**kwargs) + + +_globals = valdict(stats_man=None) + + +def monitoring_yaml_file(cpplib): + path = Path(".phare") / "stats" / f"rank.{cpplib.mpi_rank()}.yaml" + path.parent.mkdir(exist_ok=True, parents=True) + return path + + +def setup_monitoring(cpplib, interval=10): + if not have_phlop(): + return + + from phlop.app import stats_man as sm # pylint: disable=import-error + + _globals.stats_man = sm.AttachableRuntimeStatsManager( + valdict(yaml=monitoring_yaml_file(cpplib), interval=interval), + dict(rank=cpplib.mpi_rank()), + ).start() + + +def monitoring_shutdown(cpplib): + if not have_phlop(): + return + + if _globals.stats_man: + _globals.stats_man.kill().join() diff --git a/pyphare/pyphare/simulator/simulator.py b/pyphare/pyphare/simulator/simulator.py index 27698ed7a..57d206b5a 100644 --- a/pyphare/pyphare/simulator/simulator.py +++ b/pyphare/pyphare/simulator/simulator.py @@ -7,6 +7,7 @@ import time as timem import numpy as np import pyphare.pharein as ph +from . import monitoring as mon life_cycles = {} @@ -127,6 +128,7 @@ def initialize(self): self.cpp_sim.initialize() self._auto_dump() # first dump might be before first advance + return self except: import sys @@ -140,7 +142,6 @@ def initialize(self): def _throw(self, e): import sys - from pyphare.cpp import cpp_lib print_rank0(e) sys.exit(1) @@ -170,12 +171,16 @@ def times(self): self.timeStep(), ) - def run(self, plot_times=False): + def run(self, plot_times=False, monitoring=True): + """monitoring requires phlop""" from pyphare.cpp import cpp_lib self._check_init() + if self.simulation.dry_run: return self + if monitoring: + mon.setup_monitoring(cpp_lib()) perf = [] end_time = self.cpp_sim.endTime() t = self.cpp_sim.currentTime() @@ -197,6 +202,7 @@ def run(self, plot_times=False): if plot_times: plot_timestep_time(perf) + mon.monitoring_shutdown(cpp_lib()) return self.reset() def _auto_dump(self): diff --git a/src/simulator/simulator.hpp b/src/simulator/simulator.hpp index a0fb352af..c333946c8 100644 --- a/src/simulator/simulator.hpp +++ b/src/simulator/simulator.hpp @@ -414,8 +414,6 @@ double Simulator<_dimension, _interp_order, _nbRefinedPart>::advance(double dt) try { - PHARE_LOG_SCOPE(1, "Simulator::advance"); - dt_new = integrator_->advance(dt); currentTime_ = startTime_ + ((*timeStamper) += dt); } diff --git a/tools/python3/phloping.py b/tools/python3/phloping.py index 6a80fa65b..0b616677b 100644 --- a/tools/python3/phloping.py +++ b/tools/python3/phloping.py @@ -6,7 +6,6 @@ from dataclasses import dataclass, field from pyphare.pharesee.run import Run -from pyphare.pharesee.hierarchy import hierarchy_from from phlop.timing.scope_timer import ScopeTimerFile as phScopeTimerFile from phlop.timing.scope_timer import file_parser as phfile_parser @@ -124,17 +123,24 @@ def normalised_times_for_L(self, ilvl): """ Normalise substep time against particle count for that level at the most recent coarse time, no refined timesteps + Particle counts may include init dump, so be one bigger. """ times = self.advance_times_for_L(ilvl) + counts = len(self.particles_per_level_per_time_step[ilvl]) + + # trim init particle count for lvl + Li_times = ( + self.particles_per_level_per_time_step[ilvl] + if counts == len(times) + else self.particles_per_level_per_time_step[ilvl][1:] + ) if ilvl == 0: - return times / self.particles_per_level_per_time_step[0] + return times / Li_times substeps = self.steps_per_coarse_timestep_for_L(ilvl) norm_times = times.copy() return ( norm_times.reshape(int(times.shape[0] / substeps), substeps) - / self.particles_per_level_per_time_step[ilvl].reshape( - self.particles_per_level_per_time_step[ilvl].shape[0], 1 - ) + / Li_times.reshape(Li_times.shape[0], 1) ).reshape(times.shape[0])