diff --git a/CHANGES.next.md b/CHANGES.next.md index 74624d26d5..1481579055 100644 --- a/CHANGES.next.md +++ b/CHANGES.next.md @@ -85,6 +85,7 @@ - Add dpb_sparksql_serverless_benchmark, which submits one job for each TPC-DS/H query and measures the whole job execution time, instead of only the query run time. + Add Intel MPI benchmark. ### Enhancements: diff --git a/perfkitbenchmarker/linux_benchmarks/mpi_benchmark.py b/perfkitbenchmarker/linux_benchmarks/mpi_benchmark.py new file mode 100644 index 0000000000..c809fc42fa --- /dev/null +++ b/perfkitbenchmarker/linux_benchmarks/mpi_benchmark.py @@ -0,0 +1,427 @@ +"""MPI benchmarking tests. + +This could go to the public PKB once we have a handle on the metrics and if +there should be tuning on each of the clouds +""" + +import logging +from typing import Any, Dict, Iterator, List, Tuple +from absl import flags + +from perfkitbenchmarker import benchmark_spec +from perfkitbenchmarker import configs +from perfkitbenchmarker import errors +from perfkitbenchmarker import flag_util +from perfkitbenchmarker import linux_virtual_machine +from perfkitbenchmarker import sample +from perfkitbenchmarker import vm_util + +from perfkitbenchmarker.linux_packages import mpi + +_BaseLinuxVirtualMachine = linux_virtual_machine.BaseLinuxVirtualMachine + +# documents the individual MPI tests in each suite +_MPI_SUITE_TESTS = { + 'IMB-MPI1': [ + 'Allgather', 'Allgatherv', 'Allreduce', 'Alltoall', 'Alltoallv', + 'Barrier', 'Bcast', 'Exchange', 'Gather', 'Gatherv', 'PingPing', + 'PingPong', 'Reduce', 'Reduce_scatter', 'Reduce_scatter_block', + 'Scatter', 'Scatterv', 'Sendrecv' + ], + 'IMB-MPI2': [], + 'IMB-NBC': [ + 'Iallgather', 'Iallgatherv', 'Iallreduce', 'Ialltoall', 'Ialltoallv', + 'Ibarrier', 'Ibcast', 'Igather', 'Igatherv', 'Ireduce', + 'Ireduce_scatter', 'Iscatter', 'Iscatterv' + ], + 'IMB-RMA': [ + 'Accumulate', 'All_get_all', 'All_put_all', 'Bidir_get', 'Bidir_put', + 'Compare_and_swap', 'Exchange_get', 'Exchange_put', 'Fetch_and_op', + 'Get_accumulate', 'One_get_all', 'One_put_all', 'Put_all_local', + 'Put_local', 'Truly_passive_put', 'Unidir_get', 'Unidir_put' + ], + 'IMB-MT': [ + 'AllReduceMT', 'BarrierMT', 'BcastMT', 'BiBandMT', 'ExchangeMT', + 'PingPingMT', 'PingPongMT', 'ReduceMT', 'SendRecvMT', 'UniBandMT' + ] +} + +flags.DEFINE_list('mpi_suites', ['IMB-MPI1'], + 'MPI benchmarks suites: {}.'.format(sorted(_MPI_SUITE_TESTS))) +_BENCHMARKS = flags.DEFINE_list( + 'mpi_benchmarks', [], + ('List of MPI benchmarks. Default is [], which means ' + 'running all benchmarks in the suite.')) +flag_util.DEFINE_integerlist( + 'mpi_threads', [0, 1], 'Number of MPI processes to use per host. For 0 ' + 'use half the number of vCPUs.') +flags.DEFINE_integer('mpi_timeout', 60, 'MPI testing timeout (seconds).') +flags.DEFINE_integer( + 'mpi_iterations', 100000, + 'Number of times to run an individual benchmark for a given byte size.') +flags.DEFINE_bool('mpi_include_zero_byte', False, + 'Whether to include a 0 byte payload in runs.') +_MSG_SIZES = flags.DEFINE_multi_integer( + 'mpi_msglog_sizes', [], ('List of 2^n byte sizes to use. ' + 'Example: [2,8] will use 4 and 64 byte payloads.')) +_MSG_SIZE_MIN = flags.DEFINE_integer('mpi_msglog_min', 10, + '2^n byte message min size.') +_MSG_SIZE_MAX = flags.DEFINE_integer('mpi_msglog_max', 11, + '2^n byte message max size.') +flags.DEFINE_integer( + 'mpi_off_cache_size', -1, + 'Avoids cache-size (use --mpi_off_cache_size= to reuse ' + 'cache, but that gives unrealistic numbers. -1 uses the ' + 'value in IMB_mem_info.h.') +flags.DEFINE_integer('mpi_off_cache_line_size', None, + 'Size of a last level cache line.') +# For more info on --mpi_ppn changes the MPI rank assignment see +# https://software.intel.com/en-us/articles/controlling-process-placement-with-the-intel-mpi-library +flags.DEFINE_integer( + 'mpi_ppn', 0, 'Processes/Ranks per node. Defaults to not setting a ppn ' + 'when running tests, instead relying on -map to place threads.') + +flags.DEFINE_list( + 'mpi_env', ['I_MPI_DEBUG=6'], + 'Comma separated list of environment variables, e.g. ' + '--mpi_env=FI_PROVIDER=tcp,FI_LOG_LEVEL=info ' + 'Default set to output MPI pinning debugging information.') +flags.DEFINE_list( + 'mpi_genv', [], 'Comma separated list of global environment variables, ' + 'i.e. environment variables to be applied to all nodes, e.g. ' + '--mpi_genv=I_MPI_PIN_PROCESSOR_LIST=0,I_MPI_PIN=1') +flags.DEFINE_bool('mpi_record_latency', True, + 'Whether to record the individual packet latencies.') +flags.DEFINE_integer( + 'mpi_npmin', None, 'Minimum number of processes to use. For IMB, this ' + 'becomes -npmin. If unspecified, no attempt will be made to specify the ' + 'minimum number of processes (i.e. the application defaults will prevail).') +flags.DEFINE_bool( + 'mpi_tune', False, + 'Whether to instruct the mpirun command to use data collected by an MPI ' + 'tuning utility like mpitune, e.g. by passing -tune to mpirun. Consider ' + 'using in conjunction with specifying the tuning data directory, e.g. for ' + 'Intel MPI setting I_MPI_TUNER_DATA_DIR.') +flags.DEFINE_bool( + 'mpi_multi', True, + 'Whether to instruct the mpirun command to set -multi and run with ' + 'multiple number of groups as opposed to just one.') + +FLAGS = flags.FLAGS + +BENCHMARK_NAME = 'mpi' + +BENCHMARK_CONFIG = """ +mpi: + description: Runs the MPI benchmarks + vm_groups: + default: + vm_count: 2 + vm_spec: + GCP: + machine_type: n1-standard-4 + zone: us-west1-a + AWS: + machine_type: c5.xlarge + zone: us-west-1c + Azure: + machine_type: Standard_B2s + zone: eastus +""" + +# these columns in the MPI output data are surfaces as sample.Sample.metrics +_METRIC_NAMES = frozenset(['time_avg', 'time_overall']) + +flags.register_validator( + 'mpi_suites', + lambda suites: set(suites) <= set(_MPI_SUITE_TESTS), + message='--mpi_suites values must be in {}'.format( + sorted(_MPI_SUITE_TESTS.keys()))) + +flags.register_validator( + 'mpi_env', + lambda env_params: all('=' in param for param in env_params), + message='--mpi_env values must be in format "key=value" or "key="') + +flags.register_validator( + 'mpi_genv', + lambda genv_params: all('=' in param for param in genv_params), + message='--mpi_genv values must be in format "key=value" or "key="') + + +def GetConfig(user_config: Dict[str, Any]) -> Dict[str, Any]: + """Returns the benchmark config to use. + + Args: + user_config: Pre-defined config. + + Raises: + InvalidFlagConfigurationError if user supplied flags are incorrect. + """ + if _MSG_SIZES.value: + if FLAGS['mpi_msglog_min'].present or FLAGS['mpi_msglog_max'].present: + raise errors.Setup.InvalidFlagConfigurationError( + 'If --mpi_msglog_sizes set cannot set ' + '--mpi_msglog_min or --mpi_msglog_min') + if _BENCHMARKS.value: + all_tests = set() + for tests in _MPI_SUITE_TESTS.values(): + all_tests.update(_LowerList(tests)) + unknown_tests = set(_LowerList(_BENCHMARKS.value)).difference(all_tests) + if unknown_tests: + raise errors.Setup.InvalidFlagConfigurationError( + f'Unknown MPI benchmarks: "{",".join(sorted(unknown_tests))}"') + config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME) + if FLAGS['num_vms'].present: + config['vm_groups']['default']['vm_count'] = FLAGS.num_vms + return config + + +def Prepare(spec: benchmark_spec.BenchmarkSpec) -> None: + vms = spec.vms + vm_util.RunThreaded(lambda vm: vm.AuthenticateVm(), vms) + logging.info('Installing mpi package') + vm_util.RunThreaded(lambda vm: vm.Install('mpi'), vms) + mpi.VerifyInstall(vms) + + +def Run(spec: benchmark_spec.BenchmarkSpec) -> List[sample.Sample]: + """Runs all of the MPI tests. + + Args: + spec: The benchmark spec. + + Returns: + List of sample.Samples + """ + vms = spec.vms + # for --mpi_threads=0 the threads per host is half of the number of vCPUs + samples = [] + # The count of real CPUs on the VM: for SMT runs (the default) it is one half + # of the number of vCPUs. When SMT is disabled it is the count of the CPUs. + real_cpus = vms[0].NumCpusForBenchmark(True) + for process_count in FLAGS.mpi_threads: + process_count = process_count or real_cpus + # Indicates whether the run is using the optimal HPC configuration of one + # thread per real (1/2 of vCPUs) CPUs. + on_real_cpus = process_count == real_cpus + samples.extend( + _RunTest(vms, + process_count * len(vms), # this is num ranks + FLAGS.mpi_ppn, on_real_cpus)) + for item in samples: + # TODO(user) reenable installing MKL when Intel repos work + # google3/cloud/performance/artemis/internal_packages/internal_intelmpi.py;l=65 + item.metadata['installed_mkl'] = False + return samples + + +def _RunTest(vms: List[_BaseLinuxVirtualMachine], total_processes: int, + ppn: int, on_real_cpus: bool) -> List[sample.Sample]: + """Runs the MPI test for this given number of processes per host. + + Args: + vms: List of virtual machines to use in the test. + total_processes: The total number of processes to run across all nodes. + ppn: Processes per node. + on_real_cpus: Whether the number of MPI processes is equal to the number of + real CPUs (vCPUs / 2) + + Returns: + List of sample.Samples. + """ + # metadata that's constant for all runs + samples = [] + for suite in FLAGS.mpi_suites: + for request in _CreateRequestWithFlagParameters( + vms=vms, + total_processes=total_processes, + suite=suite, + tests=_GetTests(suite), + ppn=ppn): + response = mpi.RunMpiStats(vms[0], request) + for item in _CreateSamples(response): + item.metadata['mpi_suite'] = suite + samples.append(item) + # Fill in metadata common to all samples. + hosts = [vm.internal_ip for vm in vms] + for item in samples: + item.metadata.update({ + 'compile_from_source': FLAGS.imb_compile_from_source, + 'threads_half_cpus': on_real_cpus, + 'smt_enabled': vms[0].IsSmtEnabled(), + 'threads': total_processes, + 'number_nodes': len(hosts), + 'nodes': str(','.join(sorted(hosts))), + 'processes_per_host': total_processes // len(hosts), + 'ppn': ppn, + 'mpi_env': ','.join(sorted(FLAGS.mpi_env + FLAGS.mpi_genv)), + 'tune': FLAGS.mpi_tune, + }) + for mpi_item in FLAGS.mpi_env + FLAGS.mpi_genv: + key, value = mpi_item.split('=', 1) + item.metadata['mpi_env_' + key] = value + return samples + + +def _CreateSamples(response: mpi.MpiResponse) -> Iterator[sample.Sample]: + """Generates samples for each result in the response.""" + for result in response.results: + for row in result.data: + for item in _MpiDataToSamples(row): + item.metadata.update({ + 'mpi_run': response.mpi_run, + 'mpi_args': response.args, + 'mpi_vendor': response.vendor, + 'mpi_version': response.version, + 'mpi_benchmark': result.benchmark, + }) + if result.groups is not None: + item.metadata['mpi_groups'] = result.groups + if result.processes_per_group is not None: + item.metadata['mpi_processes_per_group'] = result.processes_per_group + if result.groups is not None: + item.metadata[ + 'mpi_ranks'] = result.processes_per_group * result.groups + else: # only one group => ranks = ppg + item.metadata['mpi_ranks'] = result.processes_per_group + if result.mode: + item.metadata['mpi_mode'] = result.mode + if result.group_layout: + # Convert {0: [1,2], 1: [3,4]} into '0=1,2;1=3,4' + layout = [] + for group_number, cpu_ids in sorted(result.group_layout.items()): + layout.append(f'{group_number}=' + f'{",".join(str(cpu) for cpu in cpu_ids)}') + item.metadata['mpi_layout'] = ';'.join(layout) + else: + item.metadata['mpi_layout'] = None + if response.mpi_pinning: + item.metadata['mpi_pinning'] = ';'.join(response.mpi_pinning) + if response.mpi_env: + mpi_env = sorted(response.mpi_env.items()) + item.metadata['mpi_running_env'] = ';'.join( + f'{key}={value}' for key, value in mpi_env) + yield item + + +def _MpiDataToSamples(row: mpi.MpiData) -> List[sample.Sample]: + """Returns the individual MPI result row as a list of Samples. + + MpiData stores the results of a run for a given benchmark ("PingPong") that + specifies the: + bytes=(integer payload byte size for the run) + is_error=(whether this run was timed out) + and has one or both of the following if the run did not time out: + data={dict of latency percentages : latency in usec} + histogram={dict of latency in usec : count of packets} + + This method returns [Samples] as the dict of latencies and the histogram dict + are reported as individual samples. + + Args: + row: A latency/histogram value for a given number of bytes. + """ + if row.is_error: + metadata = {'bytes': row.bytes, 'mpi_timeout': FLAGS.mpi_timeout} + # value=1 so that the timeline chart can show a blip when this happens + return [sample.Sample('timeout_error', 1, 'count', metadata)] + found_metrics = _METRIC_NAMES.intersection(row.data) + if not found_metrics: + logging.warning('Skipping row %s as missing a required metric name %s', row, + _METRIC_NAMES) + return [] + metric = list(found_metrics)[0] + ret = [sample.Sample(metric, row.data[metric], 'usec', row.data)] + if row.histogram: + # change the key of the histogram to a string to match existing TCP_RR data + metadata = { + 'histogram': { + str(latency): count for latency, count in row.histogram.items() + }, + } + ret.append(sample.Sample('MPI_Latency_Histogram', 0, 'usec', metadata)) + for item in ret: + item.metadata.update({ + 'bytes': row.bytes, + 'repetitions': row.repetitions, + }) + return ret + + +def Cleanup(spec: benchmark_spec.BenchmarkSpec) -> None: + del spec # Unused + + +def _CreateRequestWithFlagParameters(vms: List[_BaseLinuxVirtualMachine], + total_processes: int, suite: str, + tests: List[str], + ppn: int) -> Iterator[mpi.MpiRequest]: + """Yields an MpiRequest using settings passed in as flags. + + If told to record MPI latencies (--mpi_record_latency) then must create + individual runs for each byte length. Flags of --mpi_msglog_min=10, + --mpi_msglog_max=12 generates 3 MpiRequests of (msglog_min=10,msglog_max=10), + (msglog_min=11,msglog_max=11), (msglog_min=12,msglog_max=12) + + Args: + vms: List of VMs to run on. + total_processes: The total number of MPI processes to run over all VMs. + suite: The name of the MPI suite to run. + tests: The individual MPI tests to run. An MpiRequest is created for each. + ppn: The Processes Per Node, passed along to mpirun. + """ + msglog_sizes: List[Tuple[int, int]] = [] + if _MSG_SIZES.value: + msglog_sizes = [(size, size) for size in _MSG_SIZES.value] + else: + if FLAGS.mpi_record_latency: + # MUST pass in only one size at a time to the mpirun command + # to get a single dump file for the run + msglog_sizes = [ + (size, size) + for size in range(FLAGS.mpi_msglog_min, FLAGS.mpi_msglog_max + 1) + ] + else: + msglog_sizes = [(FLAGS.mpi_msglog_min, FLAGS.mpi_msglog_max)] + for test in tests: + for msglog_min, msglog_max in msglog_sizes: + yield mpi.MpiRequest( + vms=vms, + total_processes=total_processes, + suite=suite, + tests=[test], + ppn=ppn, + msglog_min=msglog_min, + msglog_max=msglog_max, + timeout=FLAGS.mpi_timeout, + off_cache_size=FLAGS.mpi_off_cache_size, + off_cache_line_size=FLAGS.mpi_off_cache_line_size, + iterations=FLAGS.mpi_iterations, + include_zero_byte=FLAGS.mpi_include_zero_byte, + compile_from_source=FLAGS.imb_compile_from_source, + environment=FLAGS.mpi_env, + global_environment=FLAGS.mpi_genv, + record_latencies=FLAGS.mpi_record_latency, + npmin=FLAGS.mpi_npmin, + tune=FLAGS.mpi_tune, + multi=FLAGS.mpi_multi) + + +def _LowerList(elements: List[str]) -> List[str]: + """Returns the list with all items lowercased.""" + return [item.lower() for item in elements] + + +def _GetTests(suite: str) -> List[str]: + """Returns the tests to run for this benchmark run. + + Args: + suite: The MPI suite to use. + + Returns: + List of individual benchmarks to run. + """ + tests = _BENCHMARKS.value or _MPI_SUITE_TESTS[suite] + all_tests = set(_LowerList(_MPI_SUITE_TESTS[suite])) + return [test for test in tests if test.lower() in all_tests] diff --git a/perfkitbenchmarker/linux_packages/imb.py b/perfkitbenchmarker/linux_packages/imb.py new file mode 100644 index 0000000000..75b9cdfb19 --- /dev/null +++ b/perfkitbenchmarker/linux_packages/imb.py @@ -0,0 +1,209 @@ +r"""Installs MPI library (Intel or OpenMPI) and compiles Intel MPI benchmarks (IMB) from source.""" +import logging +import posixpath +from typing import List, Optional + +from absl import flags +from perfkitbenchmarker.linux_packages import intel_repo +from perfkitbenchmarker.linux_packages import intelmpi + +FLAGS = flags.FLAGS + +COMPILE_FROM_SOURCE = flags.DEFINE_bool( + 'imb_compile_from_source', True, + 'Whether to compile the Intel MPI benchmarks from source.') + +_INTEL_DIR = '/opt/intel' +_INTEL_COMPILER_DIR = posixpath.join(_INTEL_DIR, + 'compilers_and_libraries/linux') +_INTEL_COMPILER_DIR_2020 = posixpath.join(_INTEL_DIR, + 'compilers_and_libraries_2020/linux') + +# TBB: Intel's "Thread Building Blocks" for multithreaded programs +# https://en.wikipedia.org/wiki/Threading_Building_Blocks +_INTEL_FIX_TBBROOT_CMD = ( + "sudo sed -i 's" + "#TBBROOT=SUBSTITUTE_INSTALL_DIR_HERE#TBBROOT={compiler_dir}/tbb#' " + '{compiler_dir}/tbb/bin/tbbvars.sh') + +# Source for the Intel MPI benchmarks +_GITHUB_URL = 'https://github.com/intel/mpi-benchmarks.git' +_GITHUB_COMMIT = '2d752544461f04111efef0926efe46826d90f720' +# Directory for the MPI benchmarks +_MPI_BENCHMARK_DIR = 'mpi-benchmarks' +# Checks out the Intel MPI benchmarks +_GIT_CHECKOUT_CMD = (f'git clone -n {_GITHUB_URL}; cd mpi-benchmarks; ' + f'git checkout {_GITHUB_COMMIT}') + +# Patch file and command to add latency histogram to Intel test code +_PATCH_FILE = 'intelmpi.patch' +_GIT_PATCH_CMD = f'patch -d {_MPI_BENCHMARK_DIR} -p3 < ~/{_PATCH_FILE}' + +# Enable verbose logging when mpirun fails due to a segfault +_ENABLE_VERBOSE_SEGFAULT_LOGS = ('echo 1 | sudo tee -a ' + '/proc/sys/kernel/print-fatal-signals') + + +def _InstallForIntelMpiLibrary( + vm) -> None: + """Compiles the Intel MPI benchmarks for Intel MPI library.""" + if intel_repo.UseOneApi(): + vm.InstallPackages('intel-oneapi-compiler-dpcpp-cpp') + vm.InstallPackages('intel-oneapi-mpi-devel') # for mpi.h + source_cmds = f'. {intel_repo.ONEAPI_VARS_FILE}' + else: + source_cmds = (f'. {_INTEL_DIR}/mkl/bin/mklvars.sh intel64; ' + f'. {_INTEL_COMPILER_DIR}/bin/compilervars.sh intel64') + for compiler_dir in (_INTEL_COMPILER_DIR, _INTEL_COMPILER_DIR_2020): + vm.RemoteCommand( + _INTEL_FIX_TBBROOT_CMD.format(compiler_dir=compiler_dir), + ignore_failure=True) + vm.RemoteCommand(_GIT_CHECKOUT_CMD) + vm.PushDataFile(_PATCH_FILE) + vm.RemoteCommand(_GIT_PATCH_CMD) + # Default make uses the Intel compiler (mpiicc) not available in repos + # {source_cmds} filled in at runtime due to differences in 2018/19 vs 2021 + compile_benchmark_cmd = ( + f'cd {_MPI_BENCHMARK_DIR}; {source_cmds}; CC=mpicc CXX=mpicxx make') + vm.RemoteCommand(compile_benchmark_cmd) + vm.RemoteCommand(_ENABLE_VERBOSE_SEGFAULT_LOGS) + + +def _InstallForOpenMpiLibrary( + vm) -> None: + """Compiles the Intel MPI benchmarks for OpenMPI library.""" + vm.RemoteCommand(_GIT_CHECKOUT_CMD) + vm.PushDataFile(_PATCH_FILE) + vm.RemoteCommand(_GIT_PATCH_CMD) + # When installing OpenMPI, openmpi.py runs ./configure.sh with --prefix=/usr. + compile_benchmark_cmd = ( + f'cd {_MPI_BENCHMARK_DIR}; CC=/usr/bin/mpicc CXX=/usr/bin/mpicxx make') + vm.RemoteCommand(compile_benchmark_cmd) + vm.RemoteCommand(_ENABLE_VERBOSE_SEGFAULT_LOGS) + + +def Install(vm) -> None: + """Installs MPI lib and compiles the Intel MPI benchmarks from source. + + Args: + vm: Virtual machine to run on. + """ + if FLAGS.mpi_vendor == 'intel': + mpilib = 'intelmpi' + install_benchmarks = _InstallForIntelMpiLibrary + elif FLAGS.mpi_vendor == 'openmpi': + if not COMPILE_FROM_SOURCE.value: + raise ValueError( + f'--mpi_vendor=openmpi requires --{COMPILE_FROM_SOURCE.name}') + mpilib = 'openmpi' + install_benchmarks = _InstallForOpenMpiLibrary + + vm.Install(mpilib) + if not COMPILE_FROM_SOURCE.value: + return + logging.info('Installing Intel MPI benchmarks from source') + vm.Install('build_tools') + install_benchmarks(vm) + + +def _MpiRunCommandForIntelMpiLibrary( + vm, hosts: List[str], + total_processes: int, ppn: int, environment: List[str], + global_environment: List[str], tune: bool) -> str: + """String command to call mpirun using Intel MPI library. + + See Intel docs for details: + https://software.intel.com/content/www/us/en/develop/documentation/mpi-developer-guide-linux/top/running-applications/controlling-process-placement.html + + "If the -ppn option is not specified, the process manager assigns as many + processes to the first node as there are physical cores on it. Then the next + node is used." + + If the ppn should not be specified in the command pass in ppn=0. However you + most likely want to pass it in so that the number of processes on each node + is balanced. + + Args: + vm: Virtual machine to run on. + hosts: List of internal IP addresses to run on. + total_processes: The total number of processes to use across all hosts. + ppn: Number of processes per node to use when assigning processes per node. + environment: List of environment variables to set, e.g. "FI_PROVIDER=tcp". + global_environment: List of global environment variables to set via the + '-genv' option to mpirun, e.g. "I_MPI_PIN_PROCESSOR_LIST=0". + tune: Whether to pass -tune. If true, consider setting the + I_MPI_TUNER_DATA_DIR environment variable. + + Returns: + String command to use in a vm.RemoteCommand call. + """ + cmd_elements = [f'{intelmpi.SourceMpiVarsCommand(vm)};'] + cmd_elements.extend(sorted(environment)) + cmd_elements.append('mpirun') + if tune: + cmd_elements.append('-tune') + cmd_elements.extend( + f'-genv {variable}' for variable in sorted(global_environment)) + cmd_elements.append(f'-n {total_processes}') + # hosts MUST remain in same order so that latency file created on first host + hosts_str = ','.join(hosts) + cmd_elements.append(f'-hosts {hosts_str}') + if ppn: + cmd_elements.append(f'-ppn {ppn}') + elif total_processes == len(hosts): + # for single-threaded runs tell MPI to run one thread on each VM + cmd_elements.append('-ppn 1') + return ' '.join(cmd_elements) + + +def _MpiRunCommandForOpenMpiLibrary(hosts: List[str], total_processes: int, + npernode: int, + environment: List[str]) -> str: + """String command to call mpirun using OpenMPI library. + + Args: + hosts: List of internal IP addresses to run on. + total_processes: Translates directly to mpirun's -n option. + npernode: Translates directly to mpirun's -npernode option. If 0, then + -npernode is set to total_processes//len(hosts). + environment: List of envionrment variables to export via mpirun -x. E.g. + "OMPI_MCA_btl=self,tcp" or "OMPI_MCA_rmaps_base_mapping_policy=core:PE=1". + See https://www.open-mpi.org/doc/v3.0/man1/mpirun.1.php for details. + + Returns: + String command to use in a vm.RemoteCommand call. + """ + + cmd_elements = [f'{env_var}' for env_var in environment] + cmd_elements.append('mpirun') + cmd_elements.extend( + [f'-x {env_var.split("=", 1)[0]}' for env_var in environment]) + + # Useful for verifying process mapping. + cmd_elements.append('-report-bindings') + cmd_elements.append('-display-map') + + cmd_elements.append(f'-n {total_processes}') + if not npernode: + npernode = total_processes // len(hosts) + cmd_elements.append(f'-npernode {npernode}') + cmd_elements.append('--use-hwthread-cpus') + # Guarantee that each host has sufficient slots (conservatively). + hosts_str = ','.join([f'{h}:slots={total_processes}' for h in hosts]) + cmd_elements.append(f'-host {hosts_str}') + + return ' '.join(cmd_elements) + + +def MpiRunCommand(vm, + hosts: List[str], total_processes: int, ppn: int, + environment: List[str], global_environment: List[str], + tune: bool) -> Optional[str]: + """String command to call mpirun.""" + if FLAGS.mpi_vendor == 'intel': + return _MpiRunCommandForIntelMpiLibrary(vm, hosts, total_processes, ppn, + environment, global_environment, + tune) + elif FLAGS.mpi_vendor == 'openmpi': + return _MpiRunCommandForOpenMpiLibrary(hosts, total_processes, ppn, + environment) diff --git a/perfkitbenchmarker/linux_packages/mpi.py b/perfkitbenchmarker/linux_packages/mpi.py new file mode 100644 index 0000000000..79677ff360 --- /dev/null +++ b/perfkitbenchmarker/linux_packages/mpi.py @@ -0,0 +1,641 @@ +"""Installs the MPI library and runs the IMB-MPI1 tests. + +Installation of the MPI library is handed off into imb.py, as +compilation of the benchmarks must be done differently depending on the MPI +library being used. + +The run_benchmarks.sh script is copied to the remote server and runs the MPI +tests. The text output is parsed by MpiResultParser. +""" + +import collections +import dataclasses +import logging +import os +import posixpath +import re +from typing import Any, Dict, Iterable, Iterator, List, Optional, Sequence, Tuple, Union +import uuid + +from absl import flags +from perfkitbenchmarker import errors +from perfkitbenchmarker import temp_dir +from perfkitbenchmarker.linux_packages import imb +from perfkitbenchmarker.linux_packages import intelmpi +from perfkitbenchmarker.linux_packages import omb +from perfkitbenchmarker.linux_packages import openmpi + +FLAGS = flags.FLAGS + + +@dataclasses.dataclass +class MpiData: + """Data for an MPI run, including headers. + + A row of the MPI run could be a timeout error or the actual data for a run + stored in the data attribute. A typical value for that is + { + "throughput":10.13, + "time_avg":193.73, + "time_max":202.26, + "time_min":186.85 + } + """ + bytes: Optional[int] = None + repetitions: Optional[int] = None + data: Optional[Dict[str, Union[int, float]]] = None + is_error: bool = False + histogram: Optional[Dict[float, int]] = None + + +@dataclasses.dataclass +class MpiResult: + """Individual runs of a MPI benchmark test. + + For example this could be the PingPong run results. + """ + benchmark: str + data: List[MpiData] + groups: Optional[int] = None + processes_per_group: Optional[int] = None + mode: Optional[str] = None + group_layout: Optional[Dict[int, List[int]]] = None + + +@dataclasses.dataclass +class MpiResponse: + """Response to the RunMpiStats call.""" + mpi_run: str + args: str + vendor: str + version: str + results: List[MpiResult] + mpi_pinning: List[str] + mpi_env: Dict[str, str] + + +@dataclasses.dataclass +class MpiRequest: + """Parameters for running an MPI test. + + See the FLAGS.mpi_XXX definitions in mpi_benchmark.py for the definitions. + """ + # TODO(andytzhu): Get rid of Any, and handle importing linux_virtual_machine + # without encountering circular dependencies + vms: List[Any] # virtual machine + total_processes: int + suite: str + tests: List[str] + ppn: int + msglog_min: int + msglog_max: int + timeout: int + off_cache_size: int + off_cache_line_size: Optional[int] + iterations: int + include_zero_byte: bool + compile_from_source: bool + environment: List[str] = dataclasses.field(default_factory=list) + global_environment: List[str] = dataclasses.field(default_factory=list) + record_latencies: bool = False + npmin: Optional[int] = None + tune: bool = False + multi: bool = False + + +# The same order as the output in the print_tail function in the patched code +LATENCY_HEADERS: List[str] = [ + 'latency_min', 'latency_p10', 'latency_p25', 'latency_p50', 'latency_p75', + 'latency_p90', 'latency_p95', 'latency_p99', 'latency_p99.5', + 'latency_p99.9', 'latency_p99.99', 'latency_max' +] + +# Regexs for parsing I_MPI_DEBUG=4 and higher output +_MPI_STARTUP_PREFIX = r'^\[(?P\d+)\] MPI startup\(\):\s+' +_MPI_ENV_RE = re.compile(_MPI_STARTUP_PREFIX + + r'(?PI_MPI.*?)=(?P.*)') + + +def Install(vm) -> None: + """See base class.""" + # Installs imb, which installs the specified MPI library and compiles + # the patched MPI benchmark appropriately for the specified MPI library. + vm.Install('imb') + VerifyInstall([vm]) + logging.info('Successfully installed MPI on %s', vm) + + +def VerifyInstall(vms) -> None: + """Runs a simple test to confirm MPI is installed correctly. + + Args: + vms: List of virtual machines to include in the test. + """ + request = MpiRequest( + vms=vms, + total_processes=vms[0].NumCpusForBenchmark(), + suite='IMB-MPI1', + tests=['PingPong'], + ppn=vms[0].NumCpusForBenchmark(), + msglog_min=10, + msglog_max=11, + timeout=20, + off_cache_size=-1, + off_cache_line_size=None, + iterations=100, + include_zero_byte=False, + compile_from_source=True, + record_latencies=False, + multi=True) + RunMpiStats(vms[0], request) + + +def GetMpiVersion(vm) -> Optional[str]: + """Returns the MPI version to use for the given OS type.""" + if FLAGS.mpi_vendor == 'intel': + return intelmpi.MPI_VERSION.value + elif FLAGS.mpi_vendor == 'openmpi': + return openmpi.GetMpiVersion(vm) + + +def RunMpiStats(vm, request: MpiRequest) -> MpiResponse: + """Runs the MPI tests. + + The first return value is all the command line arguments used to run the + text except for the names of the hosts. This is so that the results in the + database can all have a common value to filter on. + + Args: + vm: virtual machine to run on + request: an MpiRequest that has the parameters for this test + + Returns: + MpiResponse named tuple. + """ + hosts = [vm.internal_ip for vm in request.vms] + + mpirun = imb.MpiRunCommand(vm, hosts, request.total_processes, request.ppn, + request.environment, request.global_environment, + request.tune) + if request.record_latencies: + latency_file = '/tmp/latency-{}-{}.txt'.format(request.tests[0], + uuid.uuid4().hex[:8]) + else: + latency_file = None + common = ' '.join( + BuildMpiBenchmarkArgs(request, latency_file, bool(request.ppn))) + try: + stdout, stderr = vm.RobustRemoteCommand(mpirun + ' ' + common) + except errors.VirtualMachine.RemoteCommandError: + # tail last 100 lines of syslog as might tell us something + for client_vm in request.vms: + logging.info('VM syslog for %s', client_vm.name) + client_vm.RemoteCommand( + 'sudo tail -n 100 /var/log/syslog /var/log/messages || exit') + raise + if stderr: + # SSH displays a warning but this could also contain mpirun errors + logging.warning('Stderr when running MPI command: %s', stderr) + lines = stdout.splitlines() + results = list(MpiResultParser(lines)) + if latency_file: + latencies = _GroupLatencyLines(vm, latency_file, request.iterations) + if latencies: + _CreateMpiDataForHistogram(latencies, results) + return MpiResponse( + mpi_run=mpirun, + args=common, + vendor=FLAGS.mpi_vendor, + version=GetMpiVersion(vm), + results=results, + mpi_pinning=omb.ParseMpiPinning(lines), + mpi_env=ParseMpiEnv(lines)) + + +class MpiResultParser(Iterable[MpiResult]): + """Parses the output of the MPI tests. + + This is an iterator where each next item is an MpiResult. + """ + _NAME = re.compile('^# Benchmarking (.*)') + _GROUP1 = re.compile( + r'.*?(?P\d+) groups of (?P\d+) processes') + _GROUP2 = re.compile(r'# #processes = (?P\d+)') + _GROUP_LAYOUT = re.compile(r'# Group\s+(\d+):\s+(.*)') + _GROUP_LAYOUT_FOLLOWON = re.compile(r'#\s+(\d+[\s\d]*)') + _HEADERS = re.compile(r'.*#repetitions.*usec') + _TIMEOUT = re.compile(r'(\d+) time-out') + _MODE = re.compile(r'#\s+MODE: (\S+)') + # for "t[usec]": https://software.intel.com/en-us/imb-user-guide-put-all-local + _MPI_HEADER_MAPPING = { + '#bytes': 'bytes', + '#repetitions': 'repetitions', + 't_min[usec]': 'time_min', + 't_max[usec]': 'time_max', + 't_avg[usec]': 'time_avg', + 'Mbytes/sec': 'throughput', + 'Msg/sec': 'messages_per_sec', + 't_ovrl[usec]': 'time_overall', + 't_pure[usec]': 'time_pure', + 't_CPU[usec]': 'time_cpu', + 'overlap[%]': 'overlap_percent', + 't[usec]': 'time_avg', + } + # these columns are integers, others are floats + _INT_COLUMNS = set(['bytes', 'repetitions']) + + def __init__(self, lines: Sequence[str]): + # _lines is a iterator over the input parameter lines + self._lines = (line.strip() for line in lines) + + def __iter__(self) -> Iterator[MpiResult]: + """Yields the next MpiResult from the input lines.""" + while True: + value = self._NextValue() + if value: + yield value + else: + break + + def _NextValue(self) -> Optional[MpiResult]: + """Returns the next MpiResult or None if no more entries.""" + name = self._BenchmarkName() + if not name: + return None + logging.info('Parsing benchmark %s', name) + groups, processes = self._NumberGroups() + group_layout = self._GroupLayout() + mode, headers = self._Headers() + data = [] + # if the previous run timed out don't record the bogus latency numbers + last_row_is_error: bool = False + for row in self._Data(headers): + if not last_row_is_error: + data.append(row) + last_row_is_error = row.is_error + return MpiResult(name, data, groups, processes, mode, group_layout) + + def _BenchmarkName(self) -> Optional[str]: + for line in self._lines: + m = self._NAME.match(line) + if m: + return m.group(1) + + def _NumberGroups(self) -> Tuple[Optional[int], int]: + """Return a tuple of the number of MPI groups and processes for the test.""" + for line in self._lines: + m = self._GROUP1.match(line) + if m: + # this MPI test has both the "groups" and "processes" attributes + return int(m.group('groups')), int(m.group('processes')) + m = self._GROUP2.match(line) + if m: + # This MPI test does not have a "groups" attribute, but "processes". + return None, int(m.group('processes')) + raise errors.Benchmarks.RunError('Did not find number of processes') + + def _GroupLayout(self) -> Optional[Dict[int, List[int]]]: + """Returns the MPI group CPU layout. + + Parses this input: + + # Group 0: 0 1 + # + # Group 1: 2 3 + # + #--------------------------------------------------- + + Into {0: [0,1], 1: [2,3]} + """ + layout = {} + last_group_number = -1 # to satisfy pytyping + for line in self._lines: + m = self._GROUP_LAYOUT.match(line) + if not m and not layout: + # no group layout in this output + return None + if m: + last_group_number = int(m.group(1)) + layout[last_group_number] = [int(cpu) for cpu in m.group(2).split()] + continue + # check for a continuation of the list of cpus + m = self._GROUP_LAYOUT_FOLLOWON.match(line) + if m: + layout[last_group_number] = [int(cpu) for cpu in m.group(1).split()] + continue + if not re.match(r'^#\s*$', line): + # Only other acceptable line is blank + break + return layout + + def _Headers(self) -> Tuple[Optional[str], Sequence[str]]: + """Returns a tuple of (benchmark mode, List of headers for data).""" + mode = None + for line in self._lines: + m = self._MODE.match(line) + if m: + mode = m.group(1) + continue + m = self._HEADERS.match(line) + if m: + return mode, line.split() + raise errors.Benchmarks.RunError('No headers found') + + def _Data(self, headers: Sequence[str]) -> Iterator[MpiData]: + """Yields MpiData for each row of a benchmark's results. + + Example input: + 0 1000000 1.17 1.17 1.17 0.00 + [ 0.83, 0.97, 0.98, 1.00, 1.02, 1.72, 1.75, 2.28, 3.12, 6.73, 65.19 ] + 1024 1000000 1.80 1.80 1.80 569.96 + [ 1.16, 1.27, 1.29, 1.81, 2.06, 2.17, 2.40, 3.46, 4.34, 10.27, 215.10 ] + + Will yield 2 MpiData records + + Args: + headers: The headers for this row of data. + """ + # Keep the last non-latency data row as the next row might contain the + # percent latency numbers for it. + on_deck: MpiData = None + for line in self._lines: + if not line: + break + m = self._TIMEOUT.match(line) + if m: + # This is a timeout error + if on_deck: # emit the last row if available + yield on_deck + yield MpiData(is_error=True, bytes=int(m.group(1))) + on_deck: MpiData = None + elif line.startswith('['): + # This is [p_min, p10, p..] list of latencies + values: List[float] = [ + float(part.strip()) for part in line[1:-1].split(',') + ] + percentiles: Dict[str, float] = dict(zip(LATENCY_HEADERS, values)) + if not on_deck: + logging.warning('Have percentiles but no previous mpidata %s', + percentiles) + continue + if sum(values) == 0.0: + # only tests that have been patched have the percentile metrics + logging.info('No percentiles data for benchmark') + else: + on_deck.data.update(percentiles) + yield on_deck + on_deck: MpiData = None + else: + # This is the regular MPI output of time_avg + if on_deck: + yield on_deck + data = self._DataIntoMap(headers, line.split()) + number_bytes = data.pop('bytes', 0) + repetitions = data.pop('repetitions', -1) + on_deck = MpiData( + bytes=number_bytes, repetitions=repetitions, data=data) + if on_deck: + # Last record in this stanza was a normal MPI row. + yield on_deck + + def _DataIntoMap(self, headers: Sequence[str], + data: Sequence[str]) -> Dict[str, Union[int, float]]: + """Converts the a row of data from the MPI results into a dict. + + Args: + headers: The column headers. + data: A row of data from the MPI output. + + Returns: + Dict of the header name to the value. + """ + row = {} + for raw_header, raw_value in zip(headers, data): + new_header = self._MPI_HEADER_MAPPING[raw_header] + row[new_header] = self._ConvertValue(new_header, raw_value) + return row + + def _ConvertValue(self, header: str, value: str) -> Union[int, float]: + return int(value) if header in self._INT_COLUMNS else float(value) + + +def BuildMpiBenchmarkArgs(request: MpiRequest, latency_file: Optional[str], + ppn_set: bool) -> List[str]: + """Creates the common arguments to pass to mpirun. + + See https://software.intel.com/en-us/imb-user-guide-command-line-control + + Args: + request: An MpiRequest object for the run's configuration. + latency_file: If present the output file to record the individual packet + latencies. + ppn_set: Whether this benchmark was run with a set ppn. + + Returns: + List of string arguments for mpirun. + """ + args: List[str] = [] + if request.compile_from_source: + args.append(posixpath.join('mpi-benchmarks', request.suite)) + else: + args.append(request.suite) + # only add -msglog if told to do so + if request.suite in ('IMB-MPI1', 'IMB-RMA', + 'IMB-NBC') and request.msglog_max is not None: + if request.msglog_min is None: + arg = request.msglog_max + else: + arg = '{}:{}'.format(request.msglog_min, request.msglog_max) + args.append('-msglog {}'.format(arg)) + if request.suite != 'IMB-MT': + # -multi is trinary: not present, 0, 1 + if request.multi: + args.append('-multi 0') + args.append('-time {}'.format(request.timeout)) + # only add -off_cache if told to do so + if request.off_cache_size: + arg = '-off_cache {}'.format(request.off_cache_size) + if request.off_cache_line_size: + arg += ',{}'.format(request.off_cache_line_size) + args.append(arg) + args.append('-iter {}'.format(request.iterations)) + if request.npmin is not None: + args.append(f'-npmin {request.npmin}') + # Setting iter_policy to off to collect the same number of samples every time. + args.append('-iter_policy off') + if not request.include_zero_byte: + args.append('-zero_size off') + # MPI benchmark tests will ignore this option if not present + args.append('-show_tail yes') + if latency_file: + args.append(f'-dumpfile {latency_file}') + if not ppn_set: + # only use -map if the --mpi_ppn was not set + number_hosts = len(request.vms) + processes_per_host = request.total_processes // number_hosts + args.append(f'-map {processes_per_host}x{number_hosts}') + args.extend(request.tests) + return args + + +def _CreateMpiDataForHistogram(grouped_lines: List[List[str]], + results: List[MpiResult]) -> None: + """Adds histogram data from the histogram file to existing data. + + The MPI parsed results are passed in as some benchmarks runs can do many + sub-runs of different MPI group values. This code pairs up those runs done + in order with the latency file that has all the runs concatenated together. + + Args: + grouped_lines: The histogram text file lines grouped by sub-run. + results: The parsed MPI results from the non-histogram data. + """ + acceptable_mpi_data: List[MpiData] = [] + # MPI runs that time out should not have histogram data associated with it. + for result in results: + acceptable_mpi_data.extend( + mpi_data for mpi_data in result.data if not mpi_data.is_error) + histograms: List[MpiData] = [] + for lines in grouped_lines: + histograms.extend(_CombineHistogramEntries(lines)) + if _MpiHistogramAcceptable(acceptable_mpi_data, histograms): + for mpi_data, histogram in zip(acceptable_mpi_data, histograms): + mpi_data.histogram = histogram.histogram + + +def _MpiHistogramAcceptable(mpi_data: List[MpiData], + histograms: List[MpiData]) -> bool: + """Returns whether the parsed MpiResults MpiData matches with the histograms. + + Criteria: + Number of MpiResults.data[] entries are the same. + The number of bytes for each MpiData matches. + The number of repetitions for each MpiData matches. + + Args: + mpi_data: List of MpiData parsed for this run. + histograms: List of MpiData histograms parsed for this run. + """ + if len(mpi_data) != len(histograms): + logging.warning('Have %s parsed MPI data but only %s histograms', + len(mpi_data), len(histograms)) + return False + for mpi_data, histogram in zip(mpi_data, histograms): + bytes_same = mpi_data.bytes == histogram.bytes + repetitions_same = mpi_data.repetitions == histogram.repetitions + if not bytes_same or not repetitions_same: + logging.warning('Parsed MPI data %s does not match with histogram %s', + mpi_data, histogram) + return False + return True + + +def _CombineHistogramEntries(lines: Iterable[str]) -> Iterator[MpiData]: + """Converts the -dumpfile latency file into MpiData. + + The latency file lines are in this form: + integer_bytes latency_usec + For example this is for a run with one latency value of 11.0usec for the + bytes=1024 run and three values for bytes=2048 of 12.1,13.5, and 13.5 usec: + 1024 11 + 2048 12.1 + 2048 13.5 + 2048 13.5 + + The number of MpiDatas returned is equal to the unique number of bytes=### + runs in the input. The MpiData's "histogram" field will be populated with a + dict where the key is the latency in microseconds and the value is the number + of times that latency has been seen. + + Args: + lines: The lines from the latency dump file. + + Yields: + An MpiData that has the histogram of latencies for all runs of a particular + number of bytes. + """ + latencies = collections.defaultdict(list) + for line in lines: + # format of file is "integer_bytes latency_usec" + parts = line.strip().split() + if len(parts) == 2: + latencies[int(parts[0])].append(float(parts[1])) + else: + logging.warning('Latency file line "%s" should have two parts', line) + if not latencies: + logging.warning('No latency entries found') + for number_bytes, times in sorted(latencies.items()): + histogram = collections.Counter() + for item in times: + # Round the sub-microsecond latency based on the latency value to reduce + # the number of latency histogram keys. + # Under 5 usec: 0.01usec accuracy. 5-40 usec: 0.1usec, 40+ usec: 1usec + if item < 5: + item = round(item, 2) + elif item < 40: + item = round(item, 1) + else: + item = round(item, 0) + histogram[item] += 1 + yield MpiData( + bytes=number_bytes, + histogram=dict(histogram), + repetitions=sum(histogram.values())) + + +def _GroupLatencyLines(vm, latency_file: str, + packets_per_run: int) -> List[List[str]]: + r"""Parses the histogram latency file copied from the remote VM. + + The latency file contains multiple sub-runs concatenated together. Each of + those runs is of length packets_per_run. The returned file is chunked into + groups of that size. + + Example: ("1\n2\n3\n4\n5\n6", 2) => [["1","2"],["3","4"],["5","6"]] + + Args: + vm: The virtual machine that has the histogram file. + latency_file: Path to the latency file on the VM. + packets_per_run: The number of packets (lines) for each test run. + + Returns: + List of lists of strings of length packets_per_run or an empty list if there + is a problem dividing up the lines into groups. + """ + local_file: str = os.path.join(temp_dir.GetRunDirPath(), + os.path.basename(latency_file)) + if vm.TryRemoteCommand(f'test -f {latency_file}'): + vm.PullFile(local_file, latency_file) + else: + logging.warning('Skipping gathering latency as %s file missing', + latency_file) + return [] + with open(local_file) as reader: + lines = [line.strip() for line in reader.readlines()] + number_groups = len(lines) // packets_per_run + if packets_per_run * number_groups != len(lines): + logging.warning('File %s has %s lines, cannot be divided into size %s', + local_file, len(lines), packets_per_run) + return [] + return [ + lines[i:i + packets_per_run] + for i in range(0, len(lines), packets_per_run) + ] + + +def ParseMpiEnv(lines: Sequence[str]) -> Dict[str, str]: + """Reads the log file for environment parameters. + + Args: + lines: Text lines from mpirun output. + + Returns: + Dict of the MPI envirnoment variables + """ + mpi_env = {} + for line in lines: + row = _MPI_ENV_RE.search(line) + if not row: + continue + mpi_env[row['mpi_var']] = row['mpi_value'] + return mpi_env diff --git a/tests/data/mpi/mpi_allgather_output.txt b/tests/data/mpi/mpi_allgather_output.txt new file mode 100644 index 0000000000..d096c743ed --- /dev/null +++ b/tests/data/mpi/mpi_allgather_output.txt @@ -0,0 +1,324 @@ +#------------------------------------------------------------ +# Intel(R) MPI Benchmarks 2019 Update 6, MPI-1 part +#------------------------------------------------------------ +# Date : Sat Apr 10 00:55:13 2021 +# Machine : x86_64 +# System : Linux +# Release : 3.10.0-1160.15.2.el7.x86_64 +# Version : #1 SMP Wed Feb 3 15:06:38 UTC 2021 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# mpi-benchmarks/IMB-MPI1 -msglog 9:9 -multi 0 -time 60 -off_cache -1 -iter 100000 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-Allgather-1dabaabb.txt Allgather + +# Minimum message length in bytes: 512 +# Maximum message length in bytes: 512 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# Allgather + +#---------------------------------------------------------------- +# Benchmarking Multi-Allgather +# ( 60 groups of 2 processes each running simultaneous ) +# Group 0: 0 1 +# +# Group 1: 2 3 +# +# Group 2: 4 5 +# +# Group 3: 6 7 +# +# Group 4: 8 9 +# +# Group 5: 10 11 +# +# Group 6: 12 13 +# +# Group 7: 14 15 +# +# Group 8: 16 17 +# +# Group 9: 18 19 +# +# Group 10: 20 21 +# +# Group 11: 22 23 +# +# Group 12: 24 25 +# +# Group 13: 26 27 +# +# Group 14: 28 29 +# +# Group 15: 30 31 +# +# Group 16: 32 33 +# +# Group 17: 34 35 +# +# Group 18: 36 37 +# +# Group 19: 38 39 +# +# Group 20: 40 41 +# +# Group 21: 42 43 +# +# Group 22: 44 45 +# +# Group 23: 46 47 +# +# Group 24: 48 49 +# +# Group 25: 50 51 +# +# Group 26: 52 53 +# +# Group 27: 54 55 +# +# Group 28: 56 57 +# +# Group 29: 58 59 +# +# Group 30: 60 61 +# +# Group 31: 62 63 +# +# Group 32: 64 65 +# +# Group 33: 66 67 +# +# Group 34: 68 69 +# +# Group 35: 70 71 +# +# Group 36: 72 73 +# +# Group 37: 74 75 +# +# Group 38: 76 77 +# +# Group 39: 78 79 +# +# Group 40: 80 81 +# +# Group 41: 82 83 +# +# Group 42: 84 85 +# +# Group 43: 86 87 +# +# Group 44: 88 89 +# +# Group 45: 90 91 +# +# Group 46: 92 93 +# +# Group 47: 94 95 +# +# Group 48: 96 97 +# +# Group 49: 98 99 +# +# Group 50: 100 101 +# +# Group 51: 102 103 +# +# Group 52: 104 105 +# +# Group 53: 106 107 +# +# Group 54: 108 109 +# +# Group 55: 110 111 +# +# Group 56: 112 113 +# +# Group 57: 114 115 +# +# Group 58: 116 117 +# +# Group 59: 118 119 +# +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 512 100000 30.48 37.96 33.80 +[ 1.91, 21.93, 27.89, 33.14, 40.05, 47.92, 53.17, 67.00, 74.15, 108.00, 1500.00, 1789.09 ] + +#---------------------------------------------------------------- +# Benchmarking Multi-Allgather +# ( 30 groups of 4 processes each running simultaneous ) +# Group 0: 0 1 2 3 +# +# Group 1: 4 5 6 7 +# +# Group 2: 8 9 10 11 +# +# Group 3: 12 13 14 15 +# +# Group 4: 16 17 18 19 +# +# Group 5: 20 21 22 23 +# +# Group 6: 24 25 26 27 +# +# Group 7: 28 29 30 31 +# +# Group 8: 32 33 34 35 +# +# Group 9: 36 37 38 39 +# +# Group 10: 40 41 42 43 +# +# Group 11: 44 45 46 47 +# +# Group 12: 48 49 50 51 +# +# Group 13: 52 53 54 55 +# +# Group 14: 56 57 58 59 +# +# Group 15: 60 61 62 63 +# +# Group 16: 64 65 66 67 +# +# Group 17: 68 69 70 71 +# +# Group 18: 72 73 74 75 +# +# Group 19: 76 77 78 79 +# +# Group 20: 80 81 82 83 +# +# Group 21: 84 85 86 87 +# +# Group 22: 88 89 90 91 +# +# Group 23: 92 93 94 95 +# +# Group 24: 96 97 98 99 +# +# Group 25: 100 101 102 103 +# +# Group 26: 104 105 106 107 +# +# Group 27: 108 109 110 111 +# +# Group 28: 112 113 114 115 +# +# Group 29: 116 117 118 119 +# +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 512 100000 64.04 92.11 75.38 +[ 9.06, 43.87, 56.03, 72.96, 92.98, 113.96, 128.03, 157.12, 171.18, 253.20, 1500.00, 6736.99 ] + +#---------------------------------------------------------------- +# Benchmarking Multi-Allgather +# ( 15 groups of 8 processes each running simultaneous ) +# Group 0: 0 1 2 3 4 5 6 7 +# +# Group 1: 8 9 10 11 12 13 14 15 +# +# Group 2: 16 17 18 19 20 21 22 23 +# +# Group 3: 24 25 26 27 28 29 30 31 +# +# Group 4: 32 33 34 35 36 37 38 39 +# +# Group 5: 40 41 42 43 44 45 46 47 +# +# Group 6: 48 49 50 51 52 53 54 55 +# +# Group 7: 56 57 58 59 60 61 62 63 +# +# Group 8: 64 65 66 67 68 69 70 71 +# +# Group 9: 72 73 74 75 76 77 78 79 +# +# Group 10: 80 81 82 83 84 85 86 87 +# +# Group 11: 88 89 90 91 92 93 94 95 +# +# Group 12: 96 97 98 99 100 101 102 103 +# +# Group 13: 104 105 106 107 108 109 110 111 +# +# Group 14: 112 113 114 115 116 117 118 119 +# +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 512 100000 69.61 97.04 82.71 +[ 14.07, 48.88, 63.90, 81.06, 101.09, 120.88, 133.99, 166.89, 189.07, 357.87, 1500.00, 7099.87 ] + +#---------------------------------------------------------------- +# Benchmarking Multi-Allgather +# ( 7 groups of 16 processes each running simultaneous ) +# Group 0: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +# +# Group 1: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 +# +# Group 2: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 +# +# Group 3: 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 +# +# Group 4: 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 +# +# Group 5: 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 +# +# Group 6: 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 +# +# ( 8 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 512 100000 77.43 93.56 84.53 +[ 15.97, 53.17, 67.00, 82.02, 97.99, 114.92, 128.03, 158.07, 179.05, 507.83, 1500.00, 7114.89 ] + +#---------------------------------------------------------------- +# Benchmarking Multi-Allgather +# ( 3 groups of 32 processes each running simultaneous ) +# Group 0: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +# 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 +# +# Group 1: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 +# 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 +# +# Group 2: 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 +# 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 +# +# ( 24 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 512 100000 84.96 93.30 89.08 +[ 20.98, 56.98, 70.10, 86.07, 103.95, 123.02, 136.14, 174.05, 247.00, 1224.04, 1500.00, 11262.89 ] + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 64 +# ( 56 additional processes waiting in MPI_Barrier) +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 512 100000 89.38 135.46 113.14 +[ 70.81, 116.11, 120.16, 125.89, 133.99, 145.91, 154.97, 183.11, 231.98, 1411.91, 1500.00, 6277.08 ] + +#---------------------------------------------------------------- +# Benchmarking Allgather +# #processes = 120 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 512 100000 127.62 227.38 177.07 +[ 139.00, 195.03, 205.99, 219.11, 233.89, 252.96, 263.93, 293.97, 313.04, 441.07, 1500.00, 1156.81 ] + + +# All processes entering MPI_Finalize + diff --git a/tests/data/mpi/mpi_allgather_parsed.json b/tests/data/mpi/mpi_allgather_parsed.json new file mode 100644 index 0000000000..084afd7ab0 --- /dev/null +++ b/tests/data/mpi/mpi_allgather_parsed.json @@ -0,0 +1,233 @@ +[ + { + "benchmark": "Multi-Allgather", + "data": [ + { + "bytes": 512, + "repetitions": 100000, + "data": { + "time_min": 30.48, + "time_max": 37.96, + "time_avg": 33.8, + "latency_min": 1.91, + "latency_p10": 21.93, + "latency_p25": 27.89, + "latency_p50": 33.14, + "latency_p75": 40.05, + "latency_p90": 47.92, + "latency_p95": 53.17, + "latency_p99": 67.0, + "latency_p99.5": 74.15, + "latency_p99.9": 108.0, + "latency_p99.99": 1500.00, + "latency_max": 1789.09 + }, + "is_error": false, + "histogram": null + } + ], + "group_layout": {"0": [0, 1], "1": [2, 3], "2": [4, 5], "3": [6, 7], "4": [8, 9], "5": [10, 11], "6": [12, 13], "7": [14, 15], "8": [16, 17], "9": [18, 19], "10": [20, 21], "11": [22, 23], "12": [24, 25], "13": [26, 27], "14": [28, 29], "15": [30, 31], "16": [32, 33], "17": [34, 35], "18": [36, 37], "19": [38, 39], "20": [40, 41], "21": [42, 43], "22": [44, 45], "23": [46, 47], "24": [48, 49], "25": [50, 51], "26": [52, 53], "27": [54, 55], "28": [56, 57], "29": [58, 59], "30": [60, 61], "31": [62, 63], "32": [64, 65], "33": [66, 67], "34": [68, 69], "35": [70, 71], "36": [72, 73], "37": [74, 75], "38": [76, 77], "39": [78, 79], "40": [80, 81], "41": [82, 83], "42": [84, 85], "43": [86, 87], "44": [88, 89], "45": [90, 91], "46": [92, 93], "47": [94, 95], "48": [96, 97], "49": [98, 99], "50": [100, 101], "51": [102, 103], "52": [104, 105], "53": [106, 107], "54": [108, 109], "55": [110, 111], "56": [112, 113], "57": [114, 115], "58": [116, 117], "59": [118, 119]}, + "groups": 60, + "processes_per_group": 2, + "mode": null + }, + { + "benchmark": "Multi-Allgather", + "data": [ + { + "bytes": 512, + "repetitions": 100000, + "data": { + "time_min": 64.04, + "time_max": 92.11, + "time_avg": 75.38, + "latency_min": 9.06, + "latency_p10": 43.87, + "latency_p25": 56.03, + "latency_p50": 72.96, + "latency_p75": 92.98, + "latency_p90": 113.96, + "latency_p95": 128.03, + "latency_p99": 157.12, + "latency_p99.5": 171.18, + "latency_p99.9": 253.2, + "latency_p99.99": 1500.00, + "latency_max": 6736.99 + }, + "is_error": false, + "histogram": null + } + ], + "group_layout": {"0": [0, 1, 2, 3], "1": [4, 5, 6, 7], "10": [40, 41, 42, 43], "11": [44, 45, 46, 47], "12": [48, 49, 50, 51], "13": [52, 53, 54, 55], "14": [56, 57, 58, 59], "15": [60, 61, 62, 63], "16": [64, 65, 66, 67], "17": [68, 69, 70, 71], "18": [72, 73, 74, 75], "19": [76, 77, 78, 79], "2": [8, 9, 10, 11], "20": [80, 81, 82, 83], "21": [84, 85, 86, 87], "22": [88, 89, 90, 91], "23": [92, 93, 94, 95], "24": [96, 97, 98, 99], "25": [100, 101, 102, 103], "26": [104, 105, 106, 107], "27": [108, 109, 110, 111], "28": [112, 113, 114, 115], "29": [116, 117, 118, 119], "3": [12, 13, 14, 15], "4": [16, 17, 18, 19], "5": [20, 21, 22, 23], "6": [24, 25, 26, 27], "7": [28, 29, 30, 31], "8": [32, 33, 34, 35], "9": [36, 37, 38, 39]}, + "groups": 30, + "processes_per_group": 4, + "mode": null + }, + { + "benchmark": "Multi-Allgather", + "data": [ + { + "bytes": 512, + "repetitions": 100000, + "data": { + "time_min": 69.61, + "time_max": 97.04, + "time_avg": 82.71, + "latency_min": 14.07, + "latency_p10": 48.88, + "latency_p25": 63.9, + "latency_p50": 81.06, + "latency_p75": 101.09, + "latency_p90": 120.88, + "latency_p95": 133.99, + "latency_p99": 166.89, + "latency_p99.5": 189.07, + "latency_p99.9": 357.87, + "latency_p99.99": 1500.00, + "latency_max": 7099.87 + }, + "is_error": false, + "histogram": null + } + ], + "group_layout": {"0": [0, 1, 2, 3, 4, 5, 6, 7], "1": [8, 9, 10, 11, 12, 13, 14, 15], "2": [16, 17, 18, 19, 20, 21, 22, 23], "3": [24, 25, 26, 27, 28, 29, 30, 31], "4": [32, 33, 34, 35, 36, 37, 38, 39], "5": [40, 41, 42, 43, 44, 45, 46, 47], "6": [48, 49, 50, 51, 52, 53, 54, 55], + "7": [56, 57, 58, 59, 60, 61, 62, 63], "8": [64, 65, 66, 67, 68, 69, 70, 71], "9": [72, 73, 74, 75, 76, 77, 78, 79], "10": [80, 81, 82, 83, 84, 85, 86, 87], + "11": [88, 89, 90, 91, 92, 93, 94, 95], "12": [96, 97, 98, 99, 100, 101, 102, 103], "13": [104, 105, 106, 107, 108, 109, 110, 111], "14": [112, 113, 114, 115, 116, 117, 118, 119]}, + "groups": 15, + "processes_per_group": 8, + "mode": null + }, + { + "benchmark": "Multi-Allgather", + "data": [ + { + "bytes": 512, + "repetitions": 100000, + "data": { + "time_min": 77.43, + "time_max": 93.56, + "time_avg": 84.53, + "latency_min": 15.97, + "latency_p10": 53.17, + "latency_p25": 67.0, + "latency_p50": 82.02, + "latency_p75": 97.99, + "latency_p90": 114.92, + "latency_p95": 128.03, + "latency_p99": 158.07, + "latency_p99.5": 179.05, + "latency_p99.9": 507.83, + "latency_p99.99": 1500.00, + "latency_max": 7114.89 + }, + "is_error": false, + "histogram": null + } + ], + "group_layout": {"0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "1": [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], + "2": [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "3": [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], + "4": [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79], "5": [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95], + "6" : [96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111]}, + "groups": 7, + "processes_per_group": 16, + "mode": null + }, + { + "benchmark": "Multi-Allgather", + "data": [ + { + "bytes": 512, + "repetitions": 100000, + "data": { + "time_min": 84.96, + "time_max": 93.3, + "time_avg": 89.08, + "latency_min": 20.98, + "latency_p10": 56.98, + "latency_p25": 70.1, + "latency_p50": 86.07, + "latency_p75": 103.95, + "latency_p90": 123.02, + "latency_p95": 136.14, + "latency_p99": 174.05, + "latency_p99.5": 247.0, + "latency_p99.9": 1224.04, + "latency_p99.99": 1500.00, + "latency_max": 11262.89 + }, + "is_error": false, + "histogram": null + } + ], + "group_layout": {"0": [16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], + "1": [48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], + "2": [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95]}, + "groups": 3, + "processes_per_group": 32, + "mode": null + }, + { + "benchmark": "Allgather", + "data": [ + { + "bytes": 512, + "repetitions": 100000, + "data": { + "time_min": 89.38, + "time_max": 135.46, + "time_avg": 113.14, + "latency_min": 70.81, + "latency_p10": 116.11, + "latency_p25": 120.16, + "latency_p50": 125.89, + "latency_p75": 133.99, + "latency_p90": 145.91, + "latency_p95": 154.97, + "latency_p99": 183.11, + "latency_p99.5": 231.98, + "latency_p99.9": 1411.91, + "latency_p99.99": 1500.00, + "latency_max": 6277.08 + }, + "is_error": false, + "histogram": null + } + ], + "group_layout": null, + "groups": null, + "processes_per_group": 64, + "mode": null + }, + { + "benchmark": "Allgather", + "data": [ + { + "bytes": 512, + "repetitions": 100000, + "data": { + "time_min": 127.62, + "time_max": 227.38, + "time_avg": 177.07, + "latency_min": 139.0, + "latency_p10": 195.03, + "latency_p25": 205.99, + "latency_p50": 219.11, + "latency_p75": 233.89, + "latency_p90": 252.96, + "latency_p95": 263.93, + "latency_p99": 293.97, + "latency_p99.5": 313.04, + "latency_p99.9": 441.07, + "latency_p99.99": 1500.00, + "latency_max": 1156.81 + }, + "is_error": false, + "histogram": null + } + ], + "group_layout": null, + "groups": null, + "processes_per_group": 120, + "mode": null + } +] diff --git a/tests/data/mpi/mpi_barrier_output.txt b/tests/data/mpi/mpi_barrier_output.txt new file mode 100644 index 0000000000..674c50f5d2 --- /dev/null +++ b/tests/data/mpi/mpi_barrier_output.txt @@ -0,0 +1,51 @@ +#------------------------------------------------------------ +# Intel(R) MPI Benchmarks 2019 Update 6, MPI-1 part +#------------------------------------------------------------ +# Date : Sun Aug 30 21:47:17 2020 +# Machine : x86_64 +# System : Linux +# Release : 4.15.0-1080-gcp +# Version : #90~16.04.1-Ubuntu SMP Fri Jul 10 19:11:10 UTC 2020 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# mpi-benchmarks/IMB-MPI1 -msglog 10:11 -multi 0 -time 60 -off_cache -1 -iter 100000 -iter_policy off -zero_size off barrier + +# Minimum message length in bytes: 1024 +# Maximum message length in bytes: 2048 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# Barrier + +#--------------------------------------------------- +# Benchmarking Multi-Barrier +# ( 2 groups of 2 processes each running simultaneous ) +# Group 0: 0 1 +# +# Group 1: 2 3 +# +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 100000 80.75 81.05 80.90 + +#--------------------------------------------------- +# Benchmarking Barrier +# #processes = 4 +#--------------------------------------------------- + #repetitions t_min[usec] t_max[usec] t_avg[usec] + 100000 91.24 91.24 91.24 + + +# All processes entering MPI_Finalize + + diff --git a/tests/data/mpi/mpi_barrier_parsed.json b/tests/data/mpi/mpi_barrier_parsed.json new file mode 100644 index 0000000000..d94b0ff6c5 --- /dev/null +++ b/tests/data/mpi/mpi_barrier_parsed.json @@ -0,0 +1,40 @@ +[ + { + "data":[ + { + "data":{ + "time_min":80.75, + "time_max":81.05, + "time_avg":80.9 + }, + "is_error":false, + "repetitions":100000, + "bytes":0 + } + ], + "processes_per_group":2, + "benchmark":"Multi-Barrier", + "mode":null, + "group_layout": {"0": [0, 1], "1": [2, 3]}, + "groups":2 + }, + { + "data":[ + { + "data":{ + "time_min":91.24, + "time_max":91.24, + "time_avg":91.24 + }, + "is_error":false, + "repetitions":100000, + "bytes":0 + } + ], + "processes_per_group":4, + "benchmark":"Barrier", + "mode":null, + "group_layout":null, + "groups":null + } +] diff --git a/tests/data/mpi/mpi_debug_output.txt b/tests/data/mpi/mpi_debug_output.txt new file mode 100644 index 0000000000..af214f84e1 --- /dev/null +++ b/tests/data/mpi/mpi_debug_output.txt @@ -0,0 +1,11 @@ +[0] MPI startup(): libfabric version: 1.9.0a1-impi +[0] MPI startup(): libfabric provider: tcp;ofi_rxm +[0] MPI startup(): Rank Pid Node name Pin cpu +[0] MPI startup(): 0 18790 pkb-f92e1167-0 {0,1,2,3} +[0] MPI startup(): 1 18944 pkb-f92e1167-1 {0,1,2,3} +[0] MPI startup(): I_MPI_ROOT=/opt/intel/compilers_and_libraries_2020.0.166/linux/mpi +[0] MPI startup(): I_MPI_MPIRUN=mpirun +[0] MPI startup(): I_MPI_HYDRA_TOPOLIB=hwloc +[0] MPI startup(): I_MPI_INTERNAL_MEM_POLICY=default +[0] MPI startup(): I_MPI_DEBUG=5 +#------------------------------------------------------------ diff --git a/tests/data/mpi/mpi_latencies_output.txt b/tests/data/mpi/mpi_latencies_output.txt new file mode 100644 index 0000000000..61b53d0ea8 --- /dev/null +++ b/tests/data/mpi/mpi_latencies_output.txt @@ -0,0 +1,42 @@ +#------------------------------------------------------------ +# Intel(R) MPI Benchmarks 2019 Update 6, MPI-1 part +#------------------------------------------------------------ +# Date : Mon Jul 6 17:04:56 2020 +# Machine : x86_64 +# System : Linux +# Release : 5.3.0-1026-gcp +# Version : #28~18.04.1-Ubuntu SMP Sat Jun 6 00:09:26 UTC 2020 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# IMB-MPI1 pingpong -msglog 10:10 -multi 0 -show_tail yes -dumpfile /tmp/dump.txt -iter 1000000 -iter_policy off + +# Minimum message length in bytes: 0 +# Maximum message length in bytes: 1024 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# PingPong + +#----------------------------------------------------------------------------- +# Benchmarking PingPong +# #processes = 2 +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 0 1000000 1.17 1.17 1.17 0.00 +[ 0.83, 0.97, 0.98, 1.00, 1.02, 1.72, 1.75, 2.28, 3.12, 6.73, 50.00, 65.19 ] + 1024 1000000 1.80 1.80 1.80 569.96 +[ 1.16, 1.27, 1.29, 1.81, 2.06, 2.17, 2.40, 3.46, 4.34, 10.27, 50.00, 215.10 ] + + +# All processes entering MPI_Finalize + diff --git a/tests/data/mpi/mpi_latencies_parsed.json b/tests/data/mpi/mpi_latencies_parsed.json new file mode 100644 index 0000000000..7bf705ea50 --- /dev/null +++ b/tests/data/mpi/mpi_latencies_parsed.json @@ -0,0 +1,56 @@ +[ + { + "data":[ + { + "data":{ + "latency_min":0.83, + "time_avg":1.17, + "latency_p99.5":3.12, + "latency_p99.99":50.00, + "time_max":1.17, + "latency_p99":2.28, + "latency_p10":0.97, + "latency_p75":1.02, + "latency_p95":1.75, + "latency_p50":1.0, + "latency_p99.9":6.73, + "latency_p90":1.72, + "throughput":0.0, + "latency_max":65.19, + "time_min":1.17, + "latency_p25":0.98 + }, + "is_error":false, + "repetitions":1000000, + "bytes":0 + }, + { + "data":{ + "latency_min":1.16, + "time_avg":1.8, + "latency_p99.5":4.34, + "latency_p99.99":50.00, + "time_max":1.8, + "latency_p99":3.46, + "latency_p10":1.27, + "latency_p75":2.06, + "latency_p95":2.4, + "latency_p50":1.81, + "latency_p99.9":10.27, + "latency_p90":2.17, + "throughput":569.96, + "latency_max":215.1, + "time_min":1.8, + "latency_p25":1.29 + }, + "is_error":false, + "repetitions":1000000, + "bytes":1024 + } + ], + "processes_per_group":2, + "benchmark":"PingPong", + "mode":null, + "groups":null + } +] diff --git a/tests/data/mpi/mpi_one_put_all_output.txt b/tests/data/mpi/mpi_one_put_all_output.txt new file mode 100644 index 0000000000..d5e8d4fb7c --- /dev/null +++ b/tests/data/mpi/mpi_one_put_all_output.txt @@ -0,0 +1,28 @@ +# mpi-benchmarks/IMB-RMA -msglog 0:0 -multi 0 -time 60 -off_cache -1 -iter 10 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-One_put_all-839321dc.txt One_put_all + +# Minimum message length in bytes: 0 +# Maximum message length in bytes: 1 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# One_put_all +Invalid benchmark name -zero_size +Invalid benchmark name off +Invalid benchmark name -show_tail +Invalid benchmark name yes +Invalid benchmark name -dumpfile +Invalid benchmark name /tmp/latency-one_put_all-839321dc.txt + +#--------------------------------------------------- +# Benchmarking One_put_all +# #processes = 2 +#--------------------------------------------------- + #bytes #repetitions t[usec] Mbytes/sec + 0 10 0.10 0.00 + 1 10 10.01 0.10 diff --git a/tests/data/mpi/mpi_one_put_all_parsed.json b/tests/data/mpi/mpi_one_put_all_parsed.json new file mode 100644 index 0000000000..c67006c5c1 --- /dev/null +++ b/tests/data/mpi/mpi_one_put_all_parsed.json @@ -0,0 +1,31 @@ +[ + { + "benchmark": "One_put_all", + "data": [ + { + "bytes": 0, + "repetitions": 10, + "data": { + "time_avg": 0.1, + "throughput": 0.0 + }, + "is_error": false, + "histogram": null + }, + { + "bytes": 1, + "repetitions": 10, + "data": { + "time_avg": 10.01, + "throughput": 0.1 + }, + "is_error": false, + "histogram": null + } + ], + "groups": null, + "processes_per_group": 2, + "mode": null, + "group_layout": null + } +] diff --git a/tests/data/mpi/mpi_pingpong_output.txt b/tests/data/mpi/mpi_pingpong_output.txt new file mode 100644 index 0000000000..6f5d400e95 --- /dev/null +++ b/tests/data/mpi/mpi_pingpong_output.txt @@ -0,0 +1,44 @@ +#------------------------------------------------------------ +# Intel(R) MPI Benchmarks 2019 Update 6, MPI-1 part +#------------------------------------------------------------ +# Date : Sun Aug 30 18:36:31 2020 +# Machine : x86_64 +# System : Linux +# Release : 4.15.0-1080-gcp +# Version : #90~16.04.1-Ubuntu SMP Fri Jul 10 19:11:10 UTC 2020 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# mpi-benchmarks/IMB-MPI1 -msglog 10:11 -multi 0 -time 60 -off_cache -1 -iter 100000 -iter_policy off -zero_size off pingpong + +# Minimum message length in bytes: 1024 +# Maximum message length in bytes: 2048 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# PingPong + +#----------------------------------------------------------------------------- +# Benchmarking Multi-PingPong +# ( 2 groups of 2 processes each running simultaneous ) +# Group 0: 0 1 +# +# Group 1: 2 3 +# +#----------------------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec + 1024 100000 56.92 61.39 59.15 16.68 + 2048 100000 64.01 68.79 66.40 29.77 + + +# All processes entering MPI_Finalize + diff --git a/tests/data/mpi/mpi_pingpong_parsed.json b/tests/data/mpi/mpi_pingpong_parsed.json new file mode 100644 index 0000000000..5441343c15 --- /dev/null +++ b/tests/data/mpi/mpi_pingpong_parsed.json @@ -0,0 +1,33 @@ +[ + { + "processes_per_group":2, + "benchmark":"Multi-PingPong", + "mode":null, + "group_layout": {"0": [0, 1], "1": [2, 3]}, + "groups":2, + "data":[ + { + "data":{ + "throughput":16.68, + "time_min":56.92, + "time_max":61.39, + "time_avg":59.15 + }, + "is_error":false, + "repetitions":100000, + "bytes":1024 + }, + { + "data":{ + "throughput":29.77, + "time_min":64.01, + "time_max":68.79, + "time_avg":66.4 + }, + "is_error":false, + "repetitions":100000, + "bytes":2048 + } + ] + } +] diff --git a/tests/data/mpi/mpi_reduce_output.txt b/tests/data/mpi/mpi_reduce_output.txt new file mode 100644 index 0000000000..18b5c490ca --- /dev/null +++ b/tests/data/mpi/mpi_reduce_output.txt @@ -0,0 +1,41 @@ +#------------------------------------------------------------ +# Intel(R) MPI Benchmarks 2019 Update 6, MPI-1 part +#------------------------------------------------------------ +# Date : Sun Aug 30 22:14:36 2020 +# Machine : x86_64 +# System : Linux +# Release : 4.15.0-1080-gcp +# Version : #90~16.04.1-Ubuntu SMP Fri Jul 10 19:11:10 UTC 2020 +# MPI Version : 3.1 +# MPI Thread Environment: + + +# Calling sequence was: + +# mpi-benchmarks/IMB-MPI1 -msglog 10:11 -multi 0 -time 60 -off_cache -1 -iter 100000 -iter_policy off -zero_size off reduce + +# Minimum message length in bytes: 1024 +# Maximum message length in bytes: 2048 +# +# MPI_Datatype : MPI_BYTE +# MPI_Datatype for reductions : MPI_FLOAT +# MPI_Op : MPI_SUM +# +# + +# List of Benchmarks to run: + +# Reduce + +#---------------------------------------------------------------- +# Benchmarking Reduce +# #processes = 2 +#---------------------------------------------------------------- + #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] + 1024 100000 5.24 15.30 10.27 + 2048 100000 7.70 20.51 14.11 + + +# All processes entering MPI_Finalize + + diff --git a/tests/data/mpi/mpi_reduce_parsed.json b/tests/data/mpi/mpi_reduce_parsed.json new file mode 100644 index 0000000000..b220bb83bd --- /dev/null +++ b/tests/data/mpi/mpi_reduce_parsed.json @@ -0,0 +1,30 @@ +[ + { + "data":[ + { + "data":{ + "time_min":5.24, + "time_max":15.3, + "time_avg":10.27 + }, + "is_error":false, + "repetitions":100000, + "bytes":1024 + }, + { + "data":{ + "time_min":7.7, + "time_max":20.51, + "time_avg":14.11 + }, + "is_error":false, + "repetitions":100000, + "bytes":2048 + } + ], + "processes_per_group":2, + "benchmark":"Reduce", + "mode":null, + "groups":null + } +] diff --git a/tests/data/mpi/mpi_tests_samples.json b/tests/data/mpi/mpi_tests_samples.json new file mode 100644 index 0000000000..ecaf6ce53f --- /dev/null +++ b/tests/data/mpi/mpi_tests_samples.json @@ -0,0 +1,306 @@ +[ + { + "metadata": { + "bytes": 1024, + "compile_from_source": true, + "mpi_args": "mpi-benchmarks/IMB-MPI1 -msglog 10:10 -multi 0 -time 60 -off_cache -1 -iter 100000 -npmin 2 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-PingPong-uuid_1.txt -map 16x1 PingPong", + "mpi_benchmark": "Multi-PingPong", + "mpi_env": "FI_LOG_LEVEL=info,FI_PROVIDER=tcp,I_MPI_PIN=1,I_MPI_PIN_PROCESSOR_LIST=0", + "mpi_env_FI_LOG_LEVEL": "info", + "mpi_env_FI_PROVIDER": "tcp", + "mpi_env_I_MPI_PIN": "1", + "mpi_env_I_MPI_PIN_PROCESSOR_LIST": "0", + "mpi_groups": 2, + "mpi_layout": "0=0,1;1=2,3", + "mpi_processes_per_group": 2, + "mpi_ranks": 4, + "mpi_run": ". /opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh; FI_LOG_LEVEL=info FI_PROVIDER=tcp mpirun -tune -genv I_MPI_PIN=1 -genv I_MPI_PIN_PROCESSOR_LIST=0 -n 16 -hosts 10.0.0.2", + "mpi_suite": "IMB-MPI1", + "mpi_vendor": "intel", + "mpi_version": "2019.2-057", + "nodes": "10.0.0.2", + "number_nodes": 1, + "ppn": 0, + "processes_per_host": 16, + "repetitions": 100000, + "smt_enabled": true, + "threads": 16, + "threads_half_cpus": true, + "throughput": 16.68, + "time_avg": 59.15, + "time_max": 61.39, + "time_min": 56.92, + "tune": true + }, + "metric": "time_avg", + "timestamp": 1600999678.0782478, + "unit": "usec", + "value": 59.15 + }, + { + "metadata": { + "bytes": 1024, + "compile_from_source": true, + "histogram": { + "10.0": 50000, + "12.5": 50000 + }, + "mpi_args": "mpi-benchmarks/IMB-MPI1 -msglog 10:10 -multi 0 -time 60 -off_cache -1 -iter 100000 -npmin 2 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-PingPong-uuid_1.txt -map 16x1 PingPong", + "mpi_benchmark": "Multi-PingPong", + "mpi_env": "FI_LOG_LEVEL=info,FI_PROVIDER=tcp,I_MPI_PIN=1,I_MPI_PIN_PROCESSOR_LIST=0", + "mpi_env_FI_LOG_LEVEL": "info", + "mpi_env_FI_PROVIDER": "tcp", + "mpi_env_I_MPI_PIN": "1", + "mpi_env_I_MPI_PIN_PROCESSOR_LIST": "0", + "mpi_groups": 2, + "mpi_layout": "0=0,1;1=2,3", + "mpi_processes_per_group": 2, + "mpi_ranks": 4, + "mpi_run": ". /opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh; FI_LOG_LEVEL=info FI_PROVIDER=tcp mpirun -tune -genv I_MPI_PIN=1 -genv I_MPI_PIN_PROCESSOR_LIST=0 -n 16 -hosts 10.0.0.2", + "mpi_suite": "IMB-MPI1", + "mpi_vendor": "intel", + "mpi_version": "2019.2-057", + "nodes": "10.0.0.2", + "number_nodes": 1, + "ppn": 0, + "processes_per_host": 16, + "repetitions": 100000, + "smt_enabled": true, + "threads": 16, + "threads_half_cpus": true, + "tune": true + }, + "metric": "MPI_Latency_Histogram", + "timestamp": 1600999678.078312, + "unit": "usec", + "value": 0.0 + }, + { + "metadata": { + "bytes": 2048, + "compile_from_source": true, + "mpi_args": "mpi-benchmarks/IMB-MPI1 -msglog 10:10 -multi 0 -time 60 -off_cache -1 -iter 100000 -npmin 2 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-PingPong-uuid_1.txt -map 16x1 PingPong", + "mpi_benchmark": "Multi-PingPong", + "mpi_env": "FI_LOG_LEVEL=info,FI_PROVIDER=tcp,I_MPI_PIN=1,I_MPI_PIN_PROCESSOR_LIST=0", + "mpi_env_FI_LOG_LEVEL": "info", + "mpi_env_FI_PROVIDER": "tcp", + "mpi_env_I_MPI_PIN": "1", + "mpi_env_I_MPI_PIN_PROCESSOR_LIST": "0", + "mpi_groups": 2, + "mpi_layout": "0=0,1;1=2,3", + "mpi_processes_per_group": 2, + "mpi_ranks": 4, + "mpi_run": ". /opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh; FI_LOG_LEVEL=info FI_PROVIDER=tcp mpirun -tune -genv I_MPI_PIN=1 -genv I_MPI_PIN_PROCESSOR_LIST=0 -n 16 -hosts 10.0.0.2", + "mpi_suite": "IMB-MPI1", + "mpi_vendor": "intel", + "mpi_version": "2019.2-057", + "nodes": "10.0.0.2", + "number_nodes": 1, + "ppn": 0, + "processes_per_host": 16, + "repetitions": 100000, + "smt_enabled": true, + "threads": 16, + "threads_half_cpus": true, + "throughput": 29.77, + "time_avg": 66.4, + "time_max": 68.79, + "time_min": 64.01, + "tune": true + }, + "metric": "time_avg", + "timestamp": 1600999678.0782793, + "unit": "usec", + "value": 66.4 + }, + { + "metadata": { + "bytes": 2048, + "compile_from_source": true, + "histogram": { + "50.0": 50000, + "6.0": 50000 + }, + "mpi_args": "mpi-benchmarks/IMB-MPI1 -msglog 10:10 -multi 0 -time 60 -off_cache -1 -iter 100000 -npmin 2 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-PingPong-uuid_1.txt -map 16x1 PingPong", + "mpi_benchmark": "Multi-PingPong", + "mpi_env": "FI_LOG_LEVEL=info,FI_PROVIDER=tcp,I_MPI_PIN=1,I_MPI_PIN_PROCESSOR_LIST=0", + "mpi_env_FI_LOG_LEVEL": "info", + "mpi_env_FI_PROVIDER": "tcp", + "mpi_env_I_MPI_PIN": "1", + "mpi_env_I_MPI_PIN_PROCESSOR_LIST": "0", + "mpi_groups": 2, + "mpi_layout": "0=0,1;1=2,3", + "mpi_processes_per_group": 2, + "mpi_ranks": 4, + "mpi_run": ". /opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh; FI_LOG_LEVEL=info FI_PROVIDER=tcp mpirun -tune -genv I_MPI_PIN=1 -genv I_MPI_PIN_PROCESSOR_LIST=0 -n 16 -hosts 10.0.0.2", + "mpi_suite": "IMB-MPI1", + "mpi_vendor": "intel", + "mpi_version": "2019.2-057", + "nodes": "10.0.0.2", + "number_nodes": 1, + "ppn": 0, + "processes_per_host": 16, + "repetitions": 100000, + "smt_enabled": true, + "threads": 16, + "threads_half_cpus": true, + "tune": true + }, + "metric": "MPI_Latency_Histogram", + "timestamp": 1601160331.9777563, + "unit": "usec", + "value": 0.0 + }, + { + "metadata": { + "bytes": 1024, + "compile_from_source": true, + "mpi_args": "mpi-benchmarks/IMB-MPI1 -msglog 11:11 -multi 0 -time 60 -off_cache -1 -iter 100000 -npmin 2 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-PingPong-uuid_2.txt -map 16x1 PingPong", + "mpi_benchmark": "Multi-PingPong", + "mpi_env": "FI_LOG_LEVEL=info,FI_PROVIDER=tcp,I_MPI_PIN=1,I_MPI_PIN_PROCESSOR_LIST=0", + "mpi_env_FI_LOG_LEVEL": "info", + "mpi_env_FI_PROVIDER": "tcp", + "mpi_env_I_MPI_PIN": "1", + "mpi_env_I_MPI_PIN_PROCESSOR_LIST": "0", + "mpi_groups": 2, + "mpi_layout": "0=0,1;1=2,3", + "mpi_processes_per_group": 2, + "mpi_ranks": 4, + "mpi_run": ". /opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh; FI_LOG_LEVEL=info FI_PROVIDER=tcp mpirun -tune -genv I_MPI_PIN=1 -genv I_MPI_PIN_PROCESSOR_LIST=0 -n 16 -hosts 10.0.0.2", + "mpi_suite": "IMB-MPI1", + "mpi_vendor": "intel", + "mpi_version": "2019.2-057", + "nodes": "10.0.0.2", + "number_nodes": 1, + "ppn": 0, + "processes_per_host": 16, + "repetitions": 100000, + "smt_enabled": true, + "threads": 16, + "threads_half_cpus": true, + "throughput": 16.68, + "time_avg": 59.15, + "time_max": 61.39, + "time_min": 56.92, + "tune": true + }, + "metric": "time_avg", + "timestamp": 1600999678.0792882, + "unit": "usec", + "value": 59.15 + }, + { + "metadata": { + "bytes": 1024, + "compile_from_source": true, + "histogram": { + "10.0": 50000, + "12.5": 50000 + }, + "mpi_args": "mpi-benchmarks/IMB-MPI1 -msglog 11:11 -multi 0 -time 60 -off_cache -1 -iter 100000 -npmin 2 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-PingPong-uuid_2.txt -map 16x1 PingPong", + "mpi_benchmark": "Multi-PingPong", + "mpi_env": "FI_LOG_LEVEL=info,FI_PROVIDER=tcp,I_MPI_PIN=1,I_MPI_PIN_PROCESSOR_LIST=0", + "mpi_env_FI_LOG_LEVEL": "info", + "mpi_env_FI_PROVIDER": "tcp", + "mpi_env_I_MPI_PIN": "1", + "mpi_env_I_MPI_PIN_PROCESSOR_LIST": "0", + "mpi_groups": 2, + "mpi_layout": "0=0,1;1=2,3", + "mpi_processes_per_group": 2, + "mpi_ranks": 4, + "mpi_run": ". /opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh; FI_LOG_LEVEL=info FI_PROVIDER=tcp mpirun -tune -genv I_MPI_PIN=1 -genv I_MPI_PIN_PROCESSOR_LIST=0 -n 16 -hosts 10.0.0.2", + "mpi_suite": "IMB-MPI1", + "mpi_vendor": "intel", + "mpi_version": "2019.2-057", + "nodes": "10.0.0.2", + "number_nodes": 1, + "ppn": 0, + "processes_per_host": 16, + "repetitions": 100000, + "smt_enabled": true, + "threads": 16, + "threads_half_cpus": true, + "tune": true + }, + "metric": "MPI_Latency_Histogram", + "timestamp": 1600999678.078312, + "unit": "usec", + "value": 0.0 + }, + { + "metadata": { + "bytes": 2048, + "compile_from_source": true, + "mpi_args": "mpi-benchmarks/IMB-MPI1 -msglog 11:11 -multi 0 -time 60 -off_cache -1 -iter 100000 -npmin 2 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-PingPong-uuid_2.txt -map 16x1 PingPong", + "mpi_benchmark": "Multi-PingPong", + "mpi_env": "FI_LOG_LEVEL=info,FI_PROVIDER=tcp,I_MPI_PIN=1,I_MPI_PIN_PROCESSOR_LIST=0", + "mpi_env_FI_LOG_LEVEL": "info", + "mpi_env_FI_PROVIDER": "tcp", + "mpi_env_I_MPI_PIN": "1", + "mpi_env_I_MPI_PIN_PROCESSOR_LIST": "0", + "mpi_groups": 2, + "mpi_layout": "0=0,1;1=2,3", + "mpi_processes_per_group": 2, + "mpi_ranks": 4, + "mpi_run": ". /opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh; FI_LOG_LEVEL=info FI_PROVIDER=tcp mpirun -tune -genv I_MPI_PIN=1 -genv I_MPI_PIN_PROCESSOR_LIST=0 -n 16 -hosts 10.0.0.2", + "mpi_suite": "IMB-MPI1", + "mpi_vendor": "intel", + "mpi_version": "2019.2-057", + "nodes": "10.0.0.2", + "number_nodes": 1, + "ppn": 0, + "processes_per_host": 16, + "repetitions": 100000, + "smt_enabled": true, + "threads": 16, + "threads_half_cpus": true, + "throughput": 29.77, + "time_avg": 66.4, + "time_max": 68.79, + "time_min": 64.01, + "tune": true + }, + "metric": "time_avg", + "timestamp": 1600999678.0830917, + "unit": "usec", + "value": 66.4 + }, + { + "metadata": { + "bytes": 2048, + "compile_from_source": true, + "histogram": { + "50.0": 50000, + "6.0": 50000 + }, + "mpi_args": "mpi-benchmarks/IMB-MPI1 -msglog 11:11 -multi 0 -time 60 -off_cache -1 -iter 100000 -npmin 2 -iter_policy off -zero_size off -show_tail yes -dumpfile /tmp/latency-PingPong-uuid_2.txt -map 16x1 PingPong", + "mpi_benchmark": "Multi-PingPong", + "mpi_env": "FI_LOG_LEVEL=info,FI_PROVIDER=tcp,I_MPI_PIN=1,I_MPI_PIN_PROCESSOR_LIST=0", + "mpi_env_FI_LOG_LEVEL": "info", + "mpi_env_FI_PROVIDER": "tcp", + "mpi_env_I_MPI_PIN": "1", + "mpi_env_I_MPI_PIN_PROCESSOR_LIST": "0", + "mpi_groups": 2, + "mpi_layout": "0=0,1;1=2,3", + "mpi_processes_per_group": 2, + "mpi_ranks": 4, + "mpi_run": ". /opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh; FI_LOG_LEVEL=info FI_PROVIDER=tcp mpirun -tune -genv I_MPI_PIN=1 -genv I_MPI_PIN_PROCESSOR_LIST=0 -n 16 -hosts 10.0.0.2", + "mpi_suite": "IMB-MPI1", + "mpi_vendor": "intel", + "mpi_version": "2019.2-057", + "nodes": "10.0.0.2", + "number_nodes": 1, + "ppn": 0, + "processes_per_host": 16, + "repetitions": 100000, + "smt_enabled": true, + "threads": 16, + "threads_half_cpus": true, + "tune": true + }, + "metric": "MPI_Latency_Histogram", + "timestamp": 1600999678.083103, + "unit": "usec", + "value": 0.0 + } +] diff --git a/tests/linux_benchmarks/mpi_benchmark_test.py b/tests/linux_benchmarks/mpi_benchmark_test.py new file mode 100644 index 0000000000..0c269129b0 --- /dev/null +++ b/tests/linux_benchmarks/mpi_benchmark_test.py @@ -0,0 +1,264 @@ +"""Tests for MPI benchmark.""" + +from typing import List +import unittest +from unittest import mock +import uuid + +from absl import flags +from absl.testing import flagsaver +from absl.testing import parameterized +from perfkitbenchmarker import benchmark_spec +from perfkitbenchmarker import errors +from perfkitbenchmarker import sample +from perfkitbenchmarker import test_util +from perfkitbenchmarker.linux_benchmarks import mpi_benchmark +from perfkitbenchmarker.linux_packages import intelmpi +from perfkitbenchmarker.linux_packages import mpi +from tests import pkb_common_test_case +from tests.linux_packages import mpi_test + + +FLAGS = flags.FLAGS + +# Histogram results from reading MPI output file +histogram1 = {'12.5': 50000, '10.0': 50000} +histogram2 = {'6.0': 50000, '50.0': 50000} +histogram_text = """\ +1024 12.51 +1024 10.01 +""" * 50000 + """\ +2048 6.00 +2048 50.0 +""" * 50000 + +MPI_VARS = '/opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh' + + +# All VMs have num_cpus=32 +class Vm(pkb_common_test_case.TestLinuxVirtualMachine): + + def __init__(self, + smt_enabled=True, + ip='10.0.0.2', + robust_remote_command_text=None) -> None: + super(Vm, self).__init__(vm_spec=pkb_common_test_case.CreateTestVmSpec()) + self.internal_ip = ip + self._num_cpus = 32 + # pylint: disable=invalid-name + self.IsSmtEnabled = mock.PropertyMock(return_value=smt_enabled) + self.RemoteCommand = mock.PropertyMock( + return_value=('Version 2019 Update 2 Build 2019.2-057', '')) + self.RobustRemoteCommand = mock.PropertyMock( + return_value=((mpi_test.ReadMpiOutput('mpi_pingpong_output.txt'), ''))) + + +def MpiRun(vms) -> List[sample.Sample]: + benchmark_module = mock.Mock(BENCHMARK_NAME='mpi') + benchmark_config = mock.Mock( + vm_groups={}, relational_db=mock.Mock(vm_groups={})) + spec = benchmark_spec.BenchmarkSpec(benchmark_module, benchmark_config, + 'abcdefg') + spec.vms = vms + return mpi_benchmark.Run(spec) + + +class MpiBenchmarkTestCase(pkb_common_test_case.PkbCommonTestCase, + test_util.SamplesTestMixin): + + _METRIC_ERR = 'Metric values should be equal %s != %s' + _VALUE_ERR = 'Values should be equal %s != %s' + _UNIT_ERR = 'Unit values should be equal %s != %s' + # the latency dump file name uses uuid4() + _MOCK_UUIDS = [mock.PropertyMock(hex=f'uuid_{i}') for i in range(12)] + + def setUp(self) -> None: + super(MpiBenchmarkTestCase, self).setUp() + FLAGS.mpi_benchmarks = ['PingPong'] + FLAGS.intelmpi_version = '2019.2-057' + self.mock_histo = self.enter_context( + mock.patch.object(mpi, '_GroupLatencyLines')) + self.mock_histo.return_value = [histogram_text.splitlines()] + self.enter_context( + mock.patch.object(intelmpi, 'MpiVars', return_value=MPI_VARS)) + + @mock.patch.object(uuid, 'uuid4', side_effect=_MOCK_UUIDS) + def testRun(self, mock_uuid) -> None: + FLAGS.mpi_threads = [0] + FLAGS.mpi_env = ['FI_PROVIDER=tcp', 'FI_LOG_LEVEL=info'] + FLAGS.mpi_genv = ['I_MPI_PIN_PROCESSOR_LIST=0', 'I_MPI_PIN=1'] + FLAGS.mpi_npmin = 2 + FLAGS.mpi_tune = True + FLAGS.mpi_multi = True + found = MpiRun([Vm()]) + expected = [] + for row in mpi_test.ReadJson('mpi_tests_samples.json'): + expected.append(sample.Sample(**row)) + expected[-1].metadata['installed_mkl'] = False + self.assertSampleListsEqualUpToTimestamp(expected, found) + self.assertLen(expected, 8) + self.assertEqual(2, self.mock_histo.call_count) + + @parameterized.parameters( + { + 'threads': [0], + 'num_vms': 1, + 'expected_threads': [16] + }, + { + 'threads': [2, 6, 18], + 'num_vms': 2, + 'expected_threads': [4, 12, 36] + }, + { + 'threads': [0], + 'num_vms': 1, + 'expected_threads': [32], + 'smt_enabled': False, # this forces threads=num_cpus + }, + ) + @mock.patch.object(mpi_benchmark, '_RunTest') + def testRunTestCommand(self, + mock_run: mock.Mock, + num_vms: int, + expected_threads: List[int], + threads: List[int], + smt_enabled: bool = True) -> None: + FLAGS.mpi_threads = threads + MpiRun([Vm(smt_enabled) for _ in range(num_vms)]) + for total_processes, found in zip(expected_threads, + mock_run.call_args_list): + _, found_total_processes, found_ppn, _ = found[0] + self.assertEqual(total_processes, found_total_processes) + self.assertEqual(0, found_ppn) + self.assertLen( + mock_run.call_args_list, len(expected_threads), + 'Missing / extra calls in {}'.format(mock_run.call_args_list)) + self.mock_histo.assert_not_called() + + @mock.patch.object(mpi, 'RunMpiStats') + def testRunMpiStatsCall(self, mock_mpistats: mock.Mock) -> None: + tests = ['PingPong', 'AllGather'] + FLAGS.mpi_benchmarks = tests + vms = [Vm(ip='1.2.3.4'), Vm(ip='5.6.7.8')] + total_processes = 32 + ppn = 0 + mpi.RunMpiStats.return_value = mpi.MpiResponse('', '', '', '', [], [], {}) + mpi_benchmark._RunTest(vms, total_processes, ppn, False) + # RunMpiStats called for each one of the --mpi_benchmarks and also for each + # of the msglog values: len(['PingPong','AllGather']) * len([10,11]) = 4 + self.assertLen(mock_mpistats.call_args_list, 4) + # just test the last one run which is AllGather with msglog_min=11 + mock_mpistats.assert_called_with( + vms[0], + mpi.MpiRequest( + vms=vms, + total_processes=total_processes, + suite='IMB-MPI1', + tests=[tests[-1]], + ppn=ppn, + msglog_min=11, + msglog_max=11, + timeout=60, + off_cache_size=-1, + off_cache_line_size=None, + iterations=100000, + include_zero_byte=False, + compile_from_source=True, + record_latencies=True, + environment=['I_MPI_DEBUG=6'], + multi=True)) + self.mock_histo.assert_not_called() + + @parameterized.parameters((True, 16), (False, 32)) + def testSmtUsage(self, smt_enabled: bool, num_processes: int) -> None: + FLAGS.mpi_threads = [0] + data = MpiRun([Vm(smt_enabled)]) + self.assertNotEmpty(data) + found = data[0].metadata + self.assertEqual(num_processes, found['processes_per_host']) + self.assertEqual(2, self.mock_histo.call_count) + + def testHistoResults(self) -> None: + FLAGS.mpi_record_latency = True + # Returns with this histogram MpiData with every call to the method + data = MpiRun([Vm(False)]) + self.assertLen(data, 16) + histogram_data = [ + item for item in data if item.metric == 'MPI_Latency_Histogram' + ] + self.assertLen(histogram_data, 8) + meta1 = { + 'bytes': 1024, + 'mpi_groups': 2, + 'mpi_processes_per_group': 2, + 'histogram': histogram1 + } + self.assertDictContainsSubset(meta1, histogram_data[0].metadata) + meta2 = { + 'bytes': 2048, + 'mpi_groups': 2, + 'mpi_processes_per_group': 2, + 'histogram': histogram2 + } + self.assertDictContainsSubset(meta2, histogram_data[1].metadata) + self.assertEqual(4, self.mock_histo.call_count) + + @flagsaver.flagsaver(mpi_benchmarks=['Qubert', 'Broadcast', 'allTOaLL']) + def testGetConfigBadBenchmark(self): + # Alltoall is a valid benchmark + with self.assertRaisesRegex(errors.Setup.InvalidFlagConfigurationError, + '"broadcast,qubert"'): + mpi_benchmark.GetConfig({}) + + @flagsaver.flagsaver(mpi_benchmarks=['Bcast'], mpi_msglog_sizes=[20]) + def testGetConfigNoErrors(self): + # Confirms that no exception is thrown + mpi_benchmark.GetConfig({}) + + @flagsaver.flagsaver(mpi_msglog_sizes=[20]) + def testGetConfigBadMessageSizeFlags(self): + # Need to do .parse() so that FLAGS['mpi_msglog_min'].present resolves + FLAGS['mpi_msglog_min'].parse(10) + with self.assertRaises(errors.Setup.InvalidFlagConfigurationError): + mpi_benchmark.GetConfig({}) + + @flagsaver.flagsaver(mpi_suites=['IMB-MT']) + def testRunTestWithSuites(self): + FLAGS.mpi_benchmarks = [] + # Mock response with no results as not testing that functionality + response = mpi.MpiResponse('a', 'b', 'c', 'd', [], [], {}) + mpirun_mock = self.enter_context( + mock.patch.object(mpi, 'RunMpiStats', return_value=response)) + vm = Vm() + + mpi_benchmark._RunTest([vm], 2, 1, True) + + expected_request = mpi.MpiRequest( + vms=[vm], + total_processes=2, + suite='IMB-MT', + tests=['UniBandMT'], + ppn=1, + msglog_min=11, + msglog_max=11, + timeout=60, + off_cache_size=-1, + off_cache_line_size=None, + iterations=100000, + include_zero_byte=False, + compile_from_source=True, + environment=['I_MPI_DEBUG=6'], + global_environment=[], + record_latencies=True, + npmin=None, + tune=False, + multi=True) + # Test the last one called + mpirun_mock.assert_called_with(vm, expected_request) + # It was called len(IMB-MT suite tests) times + self.assertLen(mpirun_mock.call_args_list, 20) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/linux_packages/imb_test.py b/tests/linux_packages/imb_test.py new file mode 100644 index 0000000000..bba2e38021 --- /dev/null +++ b/tests/linux_packages/imb_test.py @@ -0,0 +1,182 @@ +"""Tests for Intel MPI benchmark.""" + +import unittest +from unittest import mock + +from absl.testing import flagsaver +from absl.testing import parameterized +from perfkitbenchmarker import os_types +from perfkitbenchmarker.linux_packages import imb +from perfkitbenchmarker.linux_packages import intelmpi +# Required for --mpi_vendor flag. +from perfkitbenchmarker.linux_packages import mpi # pylint: disable=unused-import + +from tests import pkb_common_test_case + + +def MockVm(): + return mock.Mock( + internal_ip='1.2.3.4', NumCpusForBenchmark=8, BASE_OS_TYPE=os_types.RHEL) + + +class IntelMpiLibTestCase(pkb_common_test_case.PkbCommonTestCase): + + MPIVARS_FILE = ('/opt/intel/compilers_and_libraries/' + 'linux/mpi/intel64/bin/mpivars.sh') + + COMPILE_2019 = ('cd mpi-benchmarks; ' + '. /opt/intel/mkl/bin/mklvars.sh intel64; ' + '. /opt/intel/compilers_and_libraries/' + 'linux/bin/compilervars.sh intel64; ' + 'CC=mpicc CXX=mpicxx make') + COMPILE_2021 = ('cd mpi-benchmarks; ' + '. /opt/intel/oneapi/setvars.sh; ' + 'CC=mpicc CXX=mpicxx make') + + def setUp(self): + super().setUp() + self.enter_context(flagsaver.flagsaver(mpi_vendor='intel')) + + def MockVmWithReturnValues(self): + # for use when calling intelmpi.py commands to find mpivars, MPI version + vm = MockVm() + vm_returns = [ + self.MPIVARS_FILE, + ('Intel(R) MPI Library for Linux* OS, ' + 'Version 2018 Update 4 Build 20180823 (id: 18555)') + ] + vm.RemoteCommand.side_effect = [(txt, '') for txt in vm_returns] + return vm + + def testInstallCompileSource(self) -> None: + vm = MockVm() + imb.Install(vm) + # TODO(user) taken out due to not installing MKL + # vm.InstallPackages.assert_called_with('intel-mkl-2020.1-102') + # just confirm that the git clone and patch were done + cmd = ';'.join([cmd[0][0] for cmd in vm.RemoteCommand.call_args_list]) + self.assertRegex( + cmd, 'git clone -n https://github.com/intel/mpi-benchmarks.git', + 'Missing git clone command') + self.assertRegex(cmd, 'patch -d mpi-benchmarks -p3 < ~/intelmpi.patch', + 'Missing patch command') + + def testMpirunMpiVersion(self): + vm = self.MockVmWithReturnValues() + + mpi_version = intelmpi.MpirunMpiVersion(vm) + + self.assertEqual('2018.4', mpi_version) + vm.RemoteCommand.assert_called_with(f'. {self.MPIVARS_FILE}; mpirun -V') + + def testMpirunMpiVersionError(self): + vm = MockVm() + vm.RemoteCommand.return_value = 'Non parsable text', '' + + with self.assertRaises(ValueError): + intelmpi.MpirunMpiVersion(vm) + + @parameterized.parameters((2, ' -ppn 1'), (4, '')) + def testPpn(self, total_processes, expected_suffix): + vm = self.MockVmWithReturnValues() + hosts = ['10.0.0.1', '10.0.0.2'] + + mpirun = imb.MpiRunCommand(vm, hosts, total_processes, 0, [], [], False) + + # '-ppn 1' is only seen when running single threaded tests + expected_mpirun = (f'mpirun -n {total_processes} -hosts 10.0.0.1,10.0.0.2' + f'{expected_suffix}') + self.assertEqual(f'. {self.MPIVARS_FILE}; {expected_mpirun}', mpirun) + + @parameterized.parameters( + ('2019.6', COMPILE_2019, []), + ('2021.2', COMPILE_2021, + ['intel-oneapi-compiler-dpcpp-cpp', 'intel-oneapi-mpi-devel'])) + def testInstall2021(self, intelmpi_version, expected_compile_cmd, + installed_packages): + vm = MockVm() + with flagsaver.flagsaver(intelmpi_version=intelmpi_version): + imb.Install(vm) + vm.RemoteCommand.assert_any_call(expected_compile_cmd) + vm.InstallPackages.assert_has_calls( + [mock.call(pkb) for pkb in installed_packages]) + + +class OpenMpiLibTestCase(pkb_common_test_case.PkbCommonTestCase): + + def setUp(self): + super().setUp() + self.enter_context(flagsaver.flagsaver(mpi_vendor='openmpi')) + + def testInstallCompileSource(self) -> None: + vm = MockVm() + imb.Install(vm) + cmd = ';'.join([cmd[0][0] for cmd in vm.RemoteCommand.call_args_list]) + self.assertRegex( + cmd, 'git clone -n https://github.com/intel/mpi-benchmarks.git', + 'Missing git clone command') + self.assertRegex(cmd, 'patch -d mpi-benchmarks -p3 < ~/intelmpi.patch', + 'Missing patch command') + + @flagsaver.flagsaver(imb_compile_from_source=False) + def testInstallWithoutImbCompileFromSourceThrows(self) -> None: + vm = MockVm() + with self.assertRaises(ValueError) as e: + imb.Install(vm) + self.assertEqual( + str(e.exception), + '--mpi_vendor=openmpi requires --imb_compile_from_source') + + def testMpiRunCommandEnvVarsExported(self): + vm = MockVm() + total_proc = 2 + ppn = 1 + hosts = ['10.0.0.1', '10.0.0.2'] + environment = [ + 'OMPI_MCA_btl=self,tcp', + 'OMPI_MCA_rmaps_base_mapping_policy=node:PE=1', + ] + + mpirun = imb.MpiRunCommand(vm, hosts, total_proc, ppn, environment, [], + False) + + expected_mpirun = ( + 'OMPI_MCA_btl=self,tcp OMPI_MCA_rmaps_base_mapping_policy=node:PE=1 ' + 'mpirun -x OMPI_MCA_btl -x OMPI_MCA_rmaps_base_mapping_policy ' + '-report-bindings -display-map -n 2 -npernode 1 --use-hwthread-cpus ' + '-host 10.0.0.1:slots=2,10.0.0.2:slots=2') + self.assertEqual(expected_mpirun, mpirun) + + def testMpiRunCommandNoEnvVarsIsFormattedCorrectly(self): + vm = MockVm() + total_proc = 2 + ppn = 1 + hosts = ['10.0.0.1', '10.0.0.2'] + environment = [] + + mpirun = imb.MpiRunCommand(vm, hosts, total_proc, ppn, environment, [], + False) + + expected_mpirun = ( + 'mpirun -report-bindings -display-map -n 2 -npernode 1 ' + '--use-hwthread-cpus -host 10.0.0.1:slots=2,10.0.0.2:slots=2') + self.assertEqual(expected_mpirun, mpirun) + + def testMpiRunCommandNoPpnSpecified(self): + vm = MockVm() + total_proc = 8 + ppn = 0 + hosts = ['10.0.0.1', '10.0.0.2', '10.0.0.3', '10.0.0.4'] + environment = [] + + mpirun = imb.MpiRunCommand(vm, hosts, total_proc, ppn, environment, [], + False) + expected_mpirun = ( + 'mpirun -report-bindings -display-map -n 8 -npernode 2 ' + '--use-hwthread-cpus -host ' + '10.0.0.1:slots=8,10.0.0.2:slots=8,10.0.0.3:slots=8,10.0.0.4:slots=8') + self.assertEqual(expected_mpirun, mpirun) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/linux_packages/mpi_test.py b/tests/linux_packages/mpi_test.py new file mode 100644 index 0000000000..87090e693d --- /dev/null +++ b/tests/linux_packages/mpi_test.py @@ -0,0 +1,266 @@ +"""Tests for MPI benchmark.""" + +import json +import os +from typing import Any, Dict, List, Union +import unittest +from unittest import mock +import uuid +from absl import flags +from absl.testing import parameterized +from perfkitbenchmarker import errors +from perfkitbenchmarker.linux_packages import intelmpi +from perfkitbenchmarker.linux_packages import mpi +from perfkitbenchmarker.linux_packages import omb +from tests import pkb_common_test_case + +FLAGS = flags.FLAGS + +_TEST_DIR = os.path.join(os.path.dirname(__file__), '..', 'data', 'mpi') +MPI_VARS = '/opt/intel/compilers_and_libraries/linux/mpi/intel64/bin/mpivars.sh' +# all "mpirun" commands start with this +RUN_PREFIX = f'. {MPI_VARS};' + + +def FilePath(file_name: str) -> str: + return os.path.join(_TEST_DIR, file_name) + + +def ReadMpiOutput(file_name: str) -> str: + with open(FilePath(file_name)) as reader: + return reader.read() + + +def ReadJson(file_name: str) -> Union[Dict[str, Any], List[Dict[str, Any]]]: + with open(FilePath(file_name)) as reader: + return json.load(reader) + + +def _CreateMpiDataFromDict(data: Dict[str, Any]) -> mpi.MpiData: + if 'error' in data: + return mpi.MpiData(is_error=True, bytes=data['bytes']) + else: + number_bytes = data.pop('bytes', None) + repetitions = data.pop('repetitions', None) + return mpi.MpiData( + bytes=number_bytes, repetitions=repetitions, data=data['data']) + + +def _CreateMpiResultsFromDict(result_json: Dict[str, Any]) -> mpi.MpiResult: + mpi_datas = [ + _CreateMpiDataFromDict(mpi_data) for mpi_data in result_json['data'] + ] + result_json['data'] = mpi_datas + if result_json.get('group_layout'): + # Convert json-serialized group number from string to an int + result_json['group_layout'] = { + int(key): value for key, value in result_json['group_layout'].items() + } + return mpi.MpiResult(**result_json) + + +def _CreateMpiResponseFromDict( + data: List[Dict[str, Any]]) -> List[mpi.MpiResult]: + return [_CreateMpiResultsFromDict(result) for result in data] + + +def ReadParsedOutput(file_name: str) -> List[mpi.MpiResult]: + return _CreateMpiResponseFromDict(ReadJson(file_name)) + + +def _MockVm(ip: str) -> mock.Mock: + vm = mock.Mock(internal_ip=ip) + vm.NumCpusForBenchmark.return_value = 8 + return vm + + +class MpiTestCase(pkb_common_test_case.PkbCommonTestCase): + MPI_VERSION = '2019.2-057' + # Lines from the -dumpfile latency file. Format is (bytes, latency usec). + LATENCY_DATA_FILE = ( + '0 1.123', + '0 2.9999', + '0 42.3', + '1024 2.0', + '1024 3.0', + '1024 3.0', + ) + + # The latency_data_file summarized in a Dict. + LATENCY_DATA: Dict[int, Dict[float, int]] = { + 0: { + 1.12: 1, + 3.0: 1, + 42.0: 1 + }, + 1024: { + 2.0: 1, + 3.0: 2 + } + } + + def setUp(self): + super(MpiTestCase, self).setUp() + FLAGS.intelmpi_version = self.MPI_VERSION + self.enter_context( + mock.patch.object(intelmpi, 'MpiVars', return_value=MPI_VARS)) + + @parameterized.parameters( + # mpirun -n 120 -hosts a,b,c,d -ppn 1 mpi-benchmarks/.... + ('mpi_allgather_output.txt', 'mpi_allgather_parsed.json'), + ('mpi_barrier_output.txt', 'mpi_barrier_parsed.json'), + ('mpi_pingpong_output.txt', 'mpi_pingpong_parsed.json'), + ('mpi_reduce_output.txt', 'mpi_reduce_parsed.json'), + ('mpi_latencies_output.txt', 'mpi_latencies_parsed.json'), + ('mpi_one_put_all_output.txt', 'mpi_one_put_all_parsed.json'), + ) + def testParseMpiOutput(self, mpi_output_file: str, + mpi_parsed_file: str) -> None: + found = list( + mpi.MpiResultParser(ReadMpiOutput(mpi_output_file).splitlines())) + expected = ReadParsedOutput(mpi_parsed_file) + self.assertEqual(expected, found) + + def testVerifyInstall(self) -> None: + vms = [_MockVm(ip) for ip in ('a', 'b')] + vms[0].RobustRemoteCommand.return_value = '', '' + mpi.VerifyInstall(vms) + mpirun_cmd = ('mpirun -n 8 -hosts a,b -ppn 8 mpi-benchmarks/IMB-MPI1 ' + '-msglog 10:11 -multi 0 -time 20 -off_cache -1 -iter 100 ' + '-iter_policy off -zero_size off -show_tail yes PingPong') + vms[0].RobustRemoteCommand.assert_called_with(RUN_PREFIX + ' ' + mpirun_cmd) + + def _CreateMpiRequest(self, + record_latencies: bool, + iterations: int = 100000) -> mpi.MpiRequest: + return mpi.MpiRequest( + vms=[_MockVm('a'), _MockVm('b')], + total_processes=10, + ppn=0, + suite='IMB-MPI1', + tests=['PingPong'], + msglog_min=10, + msglog_max=11, + timeout=20, + off_cache_size=-1, + off_cache_line_size=None, + iterations=iterations, + include_zero_byte=False, + compile_from_source=True, + record_latencies=record_latencies, + multi=True) + + def testRunMpiStats(self) -> None: + vm = _MockVm('a') + vm.RobustRemoteCommand.return_value = ReadMpiOutput( + 'mpi_pingpong_output.txt'), '' + request = self._CreateMpiRequest(False) + response = mpi.RunMpiStats(vm, request) + self.assertEqual(RUN_PREFIX + ' mpirun -n 10 -hosts a,b', response.mpi_run) + self.assertEqual('intel', response.vendor) + self.assertEqual('2019.2-057', response.version) + # fully tested in testParseFiles + self.assertLen(response.results, 1) + expected_args = ('mpi-benchmarks/IMB-MPI1 -msglog 10:11 -multi 0 -time 20 ' + '-off_cache -1 -iter 100000 -iter_policy off ' + '-zero_size off -show_tail yes -map 5x2 PingPong') + self.assertEqual(expected_args, response.args) + + @mock.patch.object(mpi, '_GroupLatencyLines') + @mock.patch.object(uuid, 'uuid4', side_effect=[mock.PropertyMock(hex='abc')]) + def testRunMpiStatsLatencyFile(self, mock_uuid: mock.Mock, + mock_create_histo: mock.Mock) -> None: + mock_create_histo.return_value = [[ + '1024 10.0', '1024 11.0', '2048 11.10', '2048 11.11' + ]] + vm = _MockVm('a') + vm.RobustRemoteCommand.return_value = ( + ReadMpiOutput('mpi_barrier_output.txt'), '') + request = self._CreateMpiRequest(True, 2) + response = mpi.RunMpiStats(vm, request) + # has the -show_tail and -dumpfile flags set + expected_args_re = (r'.*-zero_size off -show_tail yes ' + r'-dumpfile /tmp/latency\S+ -map 5x2 PingPong$') + self.assertRegex(response.args, expected_args_re) + mock_create_histo.assert_called_with(vm, '/tmp/latency-PingPong-abc.txt', 2) + + @mock.patch('builtins.open', + mock.mock_open(read_data='\n'.join(LATENCY_DATA_FILE))) + def testGroupLatencyLines(self): + vm = mock.Mock() + vm.TryRemoteCommand.return_value = True + expected_group1 = ['0 1.123', '0 2.9999', '0 42.3'] + expected_group2 = ['1024 2.0', '1024 3.0', '1024 3.0'] + lines = mpi._GroupLatencyLines(vm, '/tmp/remote.txt', 3) + self.assertEqual([expected_group1, expected_group2], lines) + vm.TryRemoteCommand.assert_called_with('test -f /tmp/remote.txt') + + def testGroupLatencyLinesMissingFile(self): + # method returns an empty list if check for remote latency file fails + vm = mock.Mock() + vm.TryRemoteCommand.return_value = False + lines = mpi._GroupLatencyLines(vm, '/tmp/remote.txt', 3) + self.assertEmpty(lines) + + def testCreateMpiDataForHistogram(self) -> None: + FLAGS.run_uri = '12345678' + grouped_lines = [['1024 10.0', '1024 11.0', '2048 11.10', '2048 11.11']] + mpi_data1 = mpi.MpiData( + bytes=1024, repetitions=2, data={'p50': 10.5}, is_error=False) + mpi_data2 = mpi.MpiData( + bytes=2048, repetitions=2, data={'p50': 11.0}, is_error=False) + parsed_results = [ + mpi.MpiResult(benchmark='PingPong', data=[mpi_data1, mpi_data2]) + ] + self.assertIsNone(parsed_results[0].data[0].histogram) + self.assertIsNone(parsed_results[0].data[1].histogram) + mpi._CreateMpiDataForHistogram(grouped_lines, parsed_results) + # number of results did not change -- added "histogram=" entry to it + self.assertLen(parsed_results, 1) + self.assertEqual({10.0: 1, 11.0: 1}, parsed_results[0].data[0].histogram) + self.assertEqual({11.1: 2}, parsed_results[0].data[1].histogram) + + def testCreateMpiDataForHistogramNoParsedResults(self) -> None: + # No parsed results -> no histograms are parsed + FLAGS.run_uri = '12345678' + grouped_lines = [['1024 10.0', '1024 11.0', '2048 11.10', '2048 11.11']] + parsed_results = [] + self.assertLen(parsed_results, 0) + mpi._CreateMpiDataForHistogram(grouped_lines, parsed_results) + self.assertLen(parsed_results, 0) + + def testRunMpiStatsWithException(self) -> None: + request = self._CreateMpiRequest(False) + vm = request.vms[0] + vm.RobustRemoteCommand.side_effect = [ + errors.VirtualMachine.RemoteCommandError + ] + with self.assertRaises(errors.VirtualMachine.RemoteCommandError): + mpi.RunMpiStats(vm, request) + # pytyping thinks that vm.RemoteCommand is a Callable but it is a Mock + last_command = vm.RemoteCommand.call_args[0][0] # pytype: disable=attribute-error + self.assertRegex(last_command, 'tail.*/var/log/') + vm.RemoteCommand.assert_called_once() # pytype: disable=attribute-error + + def testParseMpiPinning(self): + lines = ReadMpiOutput('mpi_debug_output.txt').splitlines() + # nodes 0 and 1 had the same MPI pinning groups of 0,1,2,3 CPUids + expected_pinning = ['0:0:0,1,2,3', '1:1:0,1,2,3'] + + self.assertEqual(expected_pinning, omb.ParseMpiPinning(lines)) + + def testParseMpiEnv(self): + lines = ReadMpiOutput('mpi_debug_output.txt').splitlines() + expected_mpi_env = { + 'I_MPI_DEBUG': '5', + 'I_MPI_HYDRA_TOPOLIB': 'hwloc', + 'I_MPI_INTERNAL_MEM_POLICY': 'default', + 'I_MPI_MPIRUN': 'mpirun', + 'I_MPI_ROOT': '/opt/intel/compilers_and_libraries_2020.0.166/linux/mpi' + } + + self.assertEqual(expected_mpi_env, mpi.ParseMpiEnv(lines)) + + +if __name__ == '__main__': + unittest.main()