From d652a19094be033c1dab722c75a727fa68c1aa0c Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 11:34:09 -0600 Subject: [PATCH 01/22] flitelf: remove srcfile from extract_symbols() It was generally not needed. Instead FLiT Bisect needed it in each symbol tuple object. Instead, just have the Bisect wrapper function create a new tuple object containing this source file name. --- scripts/flitcli/flit_bisect.py | 14 +++++++++++++- scripts/flitcli/flitelf.py | 12 +++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/scripts/flitcli/flit_bisect.py b/scripts/flitcli/flit_bisect.py index 02c6a4dc..965f0ea5 100644 --- a/scripts/flitcli/flit_bisect.py +++ b/scripts/flitcli/flit_bisect.py @@ -85,6 +85,7 @@ files that cause the variability. ''' +from collections import namedtuple from tempfile import NamedTemporaryFile import argparse import csv @@ -694,6 +695,8 @@ def is_result_differing(resultfile): return float(get_comparison_result(resultfile)) != 0.0 _extract_symbols_memos = {} +BisectSymbolTuple = namedtuple('BisectSymbolTuple', + 'src, symbol, demangled, fname, lineno') def extract_symbols(file_or_filelist, objdir): ''' Extracts symbols for the given source file(s). The corresponding object is @@ -728,7 +731,16 @@ def extract_symbols(file_or_filelist, objdir): if fobj in _extract_symbols_memos: return _extract_symbols_memos[fobj] - _extract_symbols_memos[fobj] = elf.extract_symbols(fobj, fname) + func_symbols, remaining_symbols = elf.extract_symbols(fobj) + func_symbols = [ + BisectSymbolTuple(fname, sym.symbol, sym.demangled, sym.fname, + sym.lineno) + for sym in func_symbols] + remaining_symbols = [ + BisectSymbolTuple(fname, sym.symbol, sym.demangled, sym.fname, + sym.lineno) + for sym in remaining_symbols] + _extract_symbols_memos[fobj] = (func_symbols, remaining_symbols) return _extract_symbols_memos[fobj] def memoize_strlist_func(func): diff --git a/scripts/flitcli/flitelf.py b/scripts/flitcli/flitelf.py index 5d04eaac..c2866fd9 100644 --- a/scripts/flitcli/flitelf.py +++ b/scripts/flitcli/flitelf.py @@ -96,19 +96,17 @@ from elftools.elf.sections import SymbolTableSection SymbolTuple = namedtuple('SymbolTuple', - 'src, symbol, demangled, fname, lineno') + 'symbol, demangled, fname, lineno') SymbolTuple.__doc__ = ''' Tuple containing information about the symbols in a file. Has the following attributes: - src: source file that was compiled symbol: mangled symbol in the compiled version demangled: demangled version of symbol - fname: filename where the symbol is actually defined. This usually - will be equal to src, but may not be in some situations. + fname: filename where the symbol is defined. lineno: line number of definition within fname. ''' -def extract_symbols(objfile, srcfile): +def extract_symbols(objfile): ''' Extracts symbols for the given object file. @@ -147,10 +145,10 @@ def extract_symbols(objfile, srcfile): fdemangled = _demangle([sym.name for sym in fsyms]) rdemangled = _demangle([sym.name for sym in rsyms]) - funcsym_tuples = [SymbolTuple(srcfile, fsyms[i].name, fdemangled[i], + funcsym_tuples = [SymbolTuple(fsyms[i].name, fdemangled[i], locs[i][0], locs[i][1]) for i in range(len(fsyms))] - remaining_tuples = [SymbolTuple(srcfile, rsyms[i].name, rdemangled[i], + remaining_tuples = [SymbolTuple(rsyms[i].name, rdemangled[i], None, None) for i in range(len(rsyms))] From e116f9c0b16581a090a3e0030449f765ceef5139 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 11:35:04 -0600 Subject: [PATCH 02/22] flitelf: make filepath for symbols all absolute paths --- scripts/flitcli/flitelf.py | 19 +++++++++++++++++++ tests/flit_cli/flit_bisect/tst_bisect.py | 18 +++++++++--------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/scripts/flitcli/flitelf.py b/scripts/flitcli/flitelf.py index c2866fd9..dddac5eb 100644 --- a/scripts/flitcli/flitelf.py +++ b/scripts/flitcli/flitelf.py @@ -243,6 +243,7 @@ def _gen_file_line_table(dwarfinfo): # generate the table table = [] for unit in dwarfinfo.iter_CUs(): + compile_dir = _get_compile_dir(unit) lineprog = dwarfinfo.line_program_for_CU(unit) prevstate = None for entry in lineprog.get_entries(): @@ -256,6 +257,8 @@ def _gen_file_line_table(dwarfinfo): dirno = lineprog['file_entry'][prevstate.file - 1].dir_index filepath = os.path.join( lineprog['include_directory'][dirno - 1], filename) + if compile_dir and not os.path.isabs(filepath): # make absolute + filepath = os.path.join(compile_dir, filepath) line = prevstate.line fromaddr = prevstate.address toaddr = max(fromaddr, entry.state.address) @@ -278,3 +281,19 @@ def _gen_file_line_table(dwarfinfo): consolidated.append(prev) return consolidated + +def _get_compile_dir(compile_unit): + ''' + Returns the directory where the compile unit was compiled from. + + @param compile_unit: A CU from a dwarfinfo.iterCUs() + + @return (bytes) the DW_AT_comp_dir attribute for the given compile unit + or None if this attribute is missing + ''' + die = next(compile_unit.iter_DIEs()) # first DIE + key = 'DW_AT_comp_dir' + if key in die.attributes: + return die.attributes['DW_AT_comp_dir'].value + else: + return None diff --git a/tests/flit_cli/flit_bisect/tst_bisect.py b/tests/flit_cli/flit_bisect/tst_bisect.py index 723001f5..500dd448 100644 --- a/tests/flit_cli/flit_bisect/tst_bisect.py +++ b/tests/flit_cli/flit_bisect/tst_bisect.py @@ -240,15 +240,15 @@ Test the All differing symbols section of the output >>> idx = bisect_out.index('All variability inducing symbols:') >>> print('\\n'.join(bisect_out[idx+1:])) # doctest:+ELLIPSIS - tests/BisectTest.cpp:96 ... -- real_problem_test(int, char**) (score 50.0) - tests/file4.cxx:110 ... -- file4_all() (score 30.0) - tests/file2.cpp:90 ... -- file2_func1_PROBLEM() (score 7.0) - tests/file1.cpp:92 ... -- file1_func2_PROBLEM() (score 5.0) - tests/file1.cpp:108 ... -- file1_func4_PROBLEM() (score 3.0) - tests/file3.cpp:103 ... -- file3_func5_PROBLEM() (score 3.0) - tests/A.cpp:95 ... -- A::fileA_method1_PROBLEM() (score 2.0) - tests/file1.cpp:100 ... -- file1_func3_PROBLEM() (score 2.0) - tests/file3.cpp:92 ... -- file3_func2_PROBLEM() (score 1.0) + /.../tests/BisectTest.cpp:96 ... -- real_problem_test(int, char**) (score 50.0) + /.../tests/file4.cxx:110 ... -- file4_all() (score 30.0) + /.../tests/file2.cpp:90 ... -- file2_func1_PROBLEM() (score 7.0) + /.../tests/file1.cpp:92 ... -- file1_func2_PROBLEM() (score 5.0) + /.../tests/file1.cpp:108 ... -- file1_func4_PROBLEM() (score 3.0) + /.../tests/file3.cpp:103 ... -- file3_func5_PROBLEM() (score 3.0) + /.../tests/A.cpp:95 ... -- A::fileA_method1_PROBLEM() (score 2.0) + /.../tests/file1.cpp:100 ... -- file1_func3_PROBLEM() (score 2.0) + /.../tests/file3.cpp:92 ... -- file3_func2_PROBLEM() (score 1.0) Test that the --compiler-type flag value made it into the bisect Makefile >>> troublecxx From 86774e8164bc4876fbc3e095be7e4528d5fcb09d Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 11:44:09 -0600 Subject: [PATCH 03/22] Add flitelf_nm.py as a backup for flitelf.py This backup acts as a replacement for flitelf.py that simply uses nm and c++filt from binutils. Perhaps surprisingly, this simpler alternative requires fewer dependencies and is faster. --- scripts/flitcli/flit_bisect.py | 7 +- scripts/flitcli/flitelf_nm.py | 179 +++++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+), 2 deletions(-) create mode 100644 scripts/flitcli/flitelf_nm.py diff --git a/scripts/flitcli/flit_bisect.py b/scripts/flitcli/flit_bisect.py index 965f0ea5..ffb944f1 100644 --- a/scripts/flitcli/flit_bisect.py +++ b/scripts/flitcli/flit_bisect.py @@ -107,7 +107,10 @@ try: import flitelf as elf except ImportError: - elf = None + try: + import flitelf_nm as elf + except ImportError: + elf = None brief_description = 'Bisect compilation to identify problematic source code' @@ -2222,7 +2225,7 @@ def main(arguments, prog=None): ''' if elf is None: - print('Error: pyelftools is not installed, bisect disabled', + print('Error: pyelftools or binutils is not installed, bisect disabled', file=sys.stderr) return 1 diff --git a/scripts/flitcli/flitelf_nm.py b/scripts/flitcli/flitelf_nm.py new file mode 100644 index 00000000..5f085a67 --- /dev/null +++ b/scripts/flitcli/flitelf_nm.py @@ -0,0 +1,179 @@ +# -- LICENSE BEGIN -- +# +# Copyright (c) 2015-2020, Lawrence Livermore National Security, LLC. +# +# Produced at the Lawrence Livermore National Laboratory +# +# Written by +# Michael Bentley (mikebentley15@gmail.com), +# Geof Sawaya (fredricflinstone@gmail.com), +# and Ian Briggs (ian.briggs@utah.edu) +# under the direction of +# Ganesh Gopalakrishnan +# and Dong H. Ahn. +# +# LLNL-CODE-743137 +# +# All rights reserved. +# +# This file is part of FLiT. For details, see +# https://pruners.github.io/flit +# Please also read +# https://github.com/PRUNERS/FLiT/blob/master/LICENSE +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the disclaimer below. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the disclaimer +# (as noted below) in the documentation and/or other materials +# provided with the distribution. +# +# - Neither the name of the LLNS/LLNL nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL +# SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# +# Additional BSD Notice +# +# 1. This notice is required to be provided under our contract +# with the U.S. Department of Energy (DOE). This work was +# produced at Lawrence Livermore National Laboratory under +# Contract No. DE-AC52-07NA27344 with the DOE. +# +# 2. Neither the United States Government nor Lawrence Livermore +# National Security, LLC nor any of their employees, makes any +# warranty, express or implied, or assumes any liability or +# responsibility for the accuracy, completeness, or usefulness of +# any information, apparatus, product, or process disclosed, or +# represents that its use would not infringe privately-owned +# rights. +# +# 3. Also, reference herein to any specific commercial products, +# process, or services by trade name, trademark, manufacturer or +# otherwise does not necessarily constitute or imply its +# endorsement, recommendation, or favoring by the United States +# Government or Lawrence Livermore National Security, LLC. The +# views and opinions of authors expressed herein do not +# necessarily state or reflect those of the United States +# Government or Lawrence Livermore National Security, LLC, and +# shall not be used for advertising or product endorsement +# purposes. +# +# -- LICENSE END -- + +''' +Utility functions for dealing with ELF binary files. This file uses +alternative methods to do this functionality that does not require the +pyelftools package. But this file gives the same public interface as +pyelftools so that it can be used as a replacement. + +Instead, this package uses binutils through subprocesses. The programs used +are "nm" and "c++filt" to perform the same functionality. +''' + +from collections import namedtuple +import subprocess as subp +import os +import shutil + +if not shutil.which('nm') or not shutil.which('c++filt'): + raise ImportError('Cannot find binaries "nm" and "c++filt"') + +SymbolTuple = namedtuple('SymbolTuple', + 'symbol, demangled, fname, lineno') +SymbolTuple.__doc__ = ''' +Tuple containing information about the symbols in a file. Has the following +attributes: + symbol: mangled symbol in the compiled version + demangled: demangled version of symbol + fname: filename where the symbol is defined. + lineno: line number of definition within fname. +''' + +def extract_symbols(objfile_or_list): + ''' + Extracts symbols for the given object file. + + @param objfile_or_list: (str or list(str)) path to object file(s) + + @return two lists of SymbolTuple objects (funcsyms, remaining). + The first is the list of exported functions that are strong symbols and + have a filename and line number where they are defined. The second is + all remaining symbols that are strong, exported, and defined. + ''' + funcsym_tuples = [] + remaining_tuples = [] + nm_args = [ + 'nm', + '--print-file-name', + '--extern-only', + '--defined-only', + '--line-numbers', + ] + if isinstance(objfile_or_list, str): + nm_args.append(objfile_or_list) + else: + nm_args.extend(objfile_or_list) + symbol_strings = subp.check_output(nm_args).decode('utf-8').splitlines() + + symbols = [] + filenames = [] + linenumbers = [] + for symbol_string in symbol_strings: + try: + stype, symbol_plus_extra = symbol_string.split(maxsplit=2)[1:] + except: + import pdb; pdb.set_trace() + if stype.lower() == 'w': + symbol = symbol_plus_extra.split()[0] + filename = None + linenumber = None + elif '\t' in symbol_plus_extra: + symbol, definition = symbol_plus_extra.split('\t', maxsplit=1) + filename, linenumber= definition.split(':') + linenumber = int(linenumber) + else: + symbol = symbol_plus_extra + filename = None + linenumber = None + symbols.append(symbol) + filenames.append(filename) + linenumbers.append(linenumber) + + demangled = _demangle(symbols) + + for sym, dem, fnam, line in zip(symbols, demangled, filenames, linenumbers): + symbol_tuple = SymbolTuple(sym, dem, fnam, line) + if fnam: + funcsym_tuples.append(symbol_tuple) + else: + remaining_tuples.append(symbol_tuple) + + return funcsym_tuples, remaining_tuples + +def _demangle(symbol_list): + 'Demangles each C++ name in the given list' + proc = subp.Popen(['c++filt'], stdin=subp.PIPE, stdout=subp.PIPE) + out, _ = proc.communicate('\n'.join(symbol_list).encode()) + demangled = out.decode('utf8').splitlines() + assert len(demangled) == len(symbol_list) + return demangled From 351a1e3f5ca930d57ef522c809d502c700e35996 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 11:47:25 -0600 Subject: [PATCH 04/22] bisect: remove disable of bisect tests when pyelftools is not installed --- tests/flit_cli/flit_bisect/Makefile | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/flit_cli/flit_bisect/Makefile b/tests/flit_cli/flit_bisect/Makefile index dccbc157..4ce0ee57 100644 --- a/tests/flit_cli/flit_bisect/Makefile +++ b/tests/flit_cli/flit_bisect/Makefile @@ -2,10 +2,6 @@ RUNNER := python3 SRC := $(wildcard tst_*.py) RUN_TARGETS := $(SRC:%.py=run_%) -IS_PYELF := $(shell if python3 -c 'import elftools' 2>/dev/null; then \ - echo true; \ - fi) - include ../../color_out.mk ifndef VERBOSE @@ -13,13 +9,7 @@ ifndef VERBOSE endif .PHONY: check help clean build run_% -ifeq ($(IS_PYELF),true) check: $(TARGETS) $(RUN_TARGETS) -else -check: - @$(call color_out,RED,Warning: pyelftools is not found on your system;\ - skipping bisect tests) -endif help: @echo "Makefile for running tests on FLiT framework" From 1a90b6bf70949313f65e5c0262c1710014254b95 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 12:20:32 -0600 Subject: [PATCH 05/22] Fix tst_bisect_biggest from symbol type change --- tests/flit_cli/flit_bisect/tst_bisect_biggest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/flit_cli/flit_bisect/tst_bisect_biggest.py b/tests/flit_cli/flit_bisect/tst_bisect_biggest.py index 8745d6a8..63b9a009 100644 --- a/tests/flit_cli/flit_bisect/tst_bisect_biggest.py +++ b/tests/flit_cli/flit_bisect/tst_bisect_biggest.py @@ -100,7 +100,7 @@ >>> flit_bisect = th._path_import(th._script_dir, 'flit_bisect') >>> util = th._path_import(th._script_dir, 'flitutil') ->>> Sym = flit_bisect.elf.SymbolTuple +>>> Sym = flit_bisect.BisectSymbolTuple >>> def create_symbol(fileno, funcno, lineno, isproblem): ... prob_str = '_PROBLEM' if isproblem else '' ... filename = 'tests/file{}.cpp'.format(fileno) From 7d64c325ceb2747f77e46cc19e53917f89bc7ff7 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 12:26:57 -0600 Subject: [PATCH 06/22] travis: print the version of binutil --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 03996658..4d189315 100644 --- a/.travis.yml +++ b/.travis.yml @@ -123,7 +123,7 @@ before_install: - pip3 install --user setuptools - pip3 install --user toml $extra_pip -script: make -j4 && make -j4 -C tests && make check +script: dpkg --list | grep binutil && make -j4 && make -j4 -C tests && make check #notifications: # email: false From 01d7d8e9a616886c01458936a6f43fe6b4806276 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 12:33:12 -0600 Subject: [PATCH 07/22] travis: move distribution to Ubuntu Bionic 18.04 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 4d189315..5617116f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,6 +82,7 @@ language: cpp os: linux +dist: bionic addons: apt: packages: &native_deps From dcf3eee7df1a96136ab91b399142a1b3c2ebe35a Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 12:44:47 -0600 Subject: [PATCH 08/22] Install flitelf_nm.py and update install test --- Makefile | 1 + tests/flit_install/tst_install_runthrough.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Makefile b/Makefile index 2a421aa2..4f887eca 100644 --- a/Makefile +++ b/Makefile @@ -93,6 +93,7 @@ install: install -m 0644 $(SCRIPT_DIR)/experimental/ninja_syntax.py $(INST_SHAREDIR)/scripts/experimental/ install -m 0644 $(SCRIPT_DIR)/flitutil.py $(INST_SHAREDIR)/scripts/ install -m 0644 $(SCRIPT_DIR)/flitelf.py $(INST_SHAREDIR)/scripts/ + install -m 0644 $(SCRIPT_DIR)/flitelf_nm.py $(INST_SHAREDIR)/scripts/ install -m 0644 $(SCRIPT_DIR)/README.md $(INST_SHAREDIR)/scripts/ @$(call color_out,BROWN, Intalling bash-completion script into $(INST_BASH_COMPLETE_DIR)) @$(call color_out,GREEN, You can source it in your ~/.bashrc or copy it to /etc/bash_completion.d/) diff --git a/tests/flit_install/tst_install_runthrough.py b/tests/flit_install/tst_install_runthrough.py index c03970a4..1e96098e 100644 --- a/tests/flit_install/tst_install_runthrough.py +++ b/tests/flit_install/tst_install_runthrough.py @@ -294,6 +294,7 @@ 'share/flit/scripts/experimental/ninja_syntax.py', 'share/flit/scripts/flit.py', 'share/flit/scripts/flit_bisect.py', + 'share/flit/scripts/flit_disguise.py', 'share/flit/scripts/flit_experimental.py', 'share/flit/scripts/flit_import.py', 'share/flit/scripts/flit_init.py', @@ -301,6 +302,7 @@ 'share/flit/scripts/flit_update.py', 'share/flit/scripts/flitconfig.py', 'share/flit/scripts/flitelf.py', + 'share/flit/scripts/flitelf_nm.py', 'share/flit/scripts/flitutil.py', 'share/flit/src', 'share/flit/src/ALL-FLIT.cpp', From a9206a721a5256ab0c8dd0235f16d301997c0fb6 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 13:46:08 -0600 Subject: [PATCH 09/22] flitelf_nm: use addr2line to get line info --- scripts/flitcli/flitelf_nm.py | 75 +++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/scripts/flitcli/flitelf_nm.py b/scripts/flitcli/flitelf_nm.py index 5f085a67..5463bd3d 100644 --- a/scripts/flitcli/flitelf_nm.py +++ b/scripts/flitcli/flitelf_nm.py @@ -90,7 +90,7 @@ are "nm" and "c++filt" to perform the same functionality. ''' -from collections import namedtuple +from collections import namedtuple, defaultdict import subprocess as subp import os import shutil @@ -127,7 +127,6 @@ def extract_symbols(objfile_or_list): '--print-file-name', '--extern-only', '--defined-only', - '--line-numbers', ] if isinstance(objfile_or_list, str): nm_args.append(objfile_or_list) @@ -135,34 +134,32 @@ def extract_symbols(objfile_or_list): nm_args.extend(objfile_or_list) symbol_strings = subp.check_output(nm_args).decode('utf-8').splitlines() + obj_symbols = defaultdict(list) symbols = [] - filenames = [] - linenumbers = [] for symbol_string in symbol_strings: - try: - stype, symbol_plus_extra = symbol_string.split(maxsplit=2)[1:] - except: - import pdb; pdb.set_trace() - if stype.lower() == 'w': - symbol = symbol_plus_extra.split()[0] - filename = None - linenumber = None - elif '\t' in symbol_plus_extra: - symbol, definition = symbol_plus_extra.split('\t', maxsplit=1) - filename, linenumber= definition.split(':') - linenumber = int(linenumber) - else: - symbol = symbol_plus_extra - filename = None - linenumber = None + loc, stype, symbol = symbol_string.split(maxsplit=2) + objfile, offset = loc.split(':') symbols.append(symbol) - filenames.append(filename) - linenumbers.append(linenumber) + obj_symbols[objfile].append((offset, stype, symbol)) + + demangle_map = dict(zip(symbols, _demangle(symbols))) - demangled = _demangle(symbols) + fileinfo_map = {} + linenumber_map = {} + for obj, symlist in obj_symbols.items(): + to_check = [] + for offset, stype, symbol in symlist: + if symbol in fileinfo_map and fileinfo_map[symbol]: + continue + elif stype.lower() != 't': + fileinfo_map[symbol] = (None, None) + else: + to_check.append((offset, symbol)) + fileinfo_map.update(_fnames_and_line_numbers(obj, to_check)) - for sym, dem, fnam, line in zip(symbols, demangled, filenames, linenumbers): - symbol_tuple = SymbolTuple(sym, dem, fnam, line) + for symbol in symbols: + fnam, line = fileinfo_map[symbol] + symbol_tuple = SymbolTuple(symbol, demangle_map[symbol], fnam, line) if fnam: funcsym_tuples.append(symbol_tuple) else: @@ -172,8 +169,36 @@ def extract_symbols(objfile_or_list): def _demangle(symbol_list): 'Demangles each C++ name in the given list' + if not symbol_list: + return [] proc = subp.Popen(['c++filt'], stdin=subp.PIPE, stdout=subp.PIPE) out, _ = proc.communicate('\n'.join(symbol_list).encode()) demangled = out.decode('utf8').splitlines() assert len(demangled) == len(symbol_list) return demangled + +def _fnames_and_line_numbers(objfile, offset_symbol_tuples): + ''' + Given a list of tuples of (offset, symbol), return a single dictionaries, a + mapping from symbol name to a tuple of (filename, line number). If the + filename and/or line number could not be determined, then both will be set + to None. + ''' + if not offset_symbol_tuples: + return {} + proc = subp.Popen(['addr2line', '-e', objfile], stdin=subp.PIPE, + stdout=subp.PIPE) + out, _ = proc.communicate('\n'.join(x[0] for x in offset_symbol_tuples) + .encode()) + info = out.decode('utf8').splitlines() + assert len(info) == len(offset_symbol_tuples), \ + 'len(info) = {}, len(offset_symbol_tuples) = {}'\ + .format(len(info), len(offset_symbol_tuples)) + mapping = {} + for line, symbol in zip(info, (x[1] for x in offset_symbol_tuples)): + filename, linenumber = line.strip().split(':') + if filename == '??' or linenumber == '0': + filename = None + linenumber = None + mapping[symbol] = (filename, linenumber) + return mapping From e27e984623ebb86e479371c56a21f1b612867797 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 13:55:58 -0600 Subject: [PATCH 10/22] travis: go back to default image --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5617116f..3e36ffd3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,7 +82,7 @@ language: cpp os: linux -dist: bionic +#dist: bionic addons: apt: packages: &native_deps From be2e14246646f86d8cdc40c8ef35cecfb57ad224 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 13:59:45 -0600 Subject: [PATCH 11/22] travis: install bash-completion --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 3e36ffd3..9738731c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -88,6 +88,7 @@ addons: packages: &native_deps - python3 - python3-pip + - bash-completion matrix: include: From 98f6317c8a5e5096f656dfb4c46c3ffe681e046d Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Mon, 27 Jul 2020 22:21:28 -0600 Subject: [PATCH 12/22] flit_disguise: add cli parsing --- scripts/bash-completion/flit | 57 ++++++++- scripts/flitcli/flit_disguise.py | 204 +++++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+), 2 deletions(-) create mode 100644 scripts/flitcli/flit_disguise.py diff --git a/scripts/bash-completion/flit b/scripts/bash-completion/flit index b636d4b3..19d6abff 100644 --- a/scripts/bash-completion/flit +++ b/scripts/bash-completion/flit @@ -23,7 +23,8 @@ _flit__sqlite_files() _flit_help() { local cur available_subcommands - available_subcommands="-h --help bisect experimental init make update import" + available_subcommands="-h --help + bisect experimental disguise init make update import" cur="${COMP_WORDS[COMP_CWORD]}" COMPREPLY=( $(compgen -W "${available_subcommands}" -- ${cur}) ) } @@ -93,6 +94,57 @@ _flit_bisect() return 0 } +_flit_disguise() +{ + local cur prev opts + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + opts="-h --help + -g --generate + -o --output + -m --disguise-map + -u --undo + --fields" + # file field + + case "${prev}" in + + -m|--disguise-map|-o|--output) + _filedir # match with a file + return 0 + ;; + + --fields) + # three possible fields: file, function, test + # TODO: separate on comma and only complete since the last comma + local possibilities=" + file + file,function + file,function,test + file,test + file,test,function + function + function,file + function,file,test + function,test + function,test,file + test + test,file + test,file,function + test,function + test,function,file" + COMPREPLY=( $(compgen -W "${possibilities}" -- "${cur}") ) + return 0 + ;; + + esac + + COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) + _filedir # positional argument, match on files + return 0 +} + _flit_init() { local cur prev opts @@ -258,7 +310,7 @@ _flit() available_subcommands=" -h --help -v --version - experimental help bisect init make update import" + experimental help bisect disguise init make update import" # subcommand completion if [ ${COMP_CWORD} -le 1 ]; then @@ -270,6 +322,7 @@ _flit() case "${subcommand}" in help) _flit_help ;; bisect) _flit_bisect ;; + disguise) _flit_disguise ;; init) _flit_init ;; make) _flit_make ;; update) _flit_update ;; diff --git a/scripts/flitcli/flit_disguise.py b/scripts/flitcli/flit_disguise.py new file mode 100644 index 00000000..ae257def --- /dev/null +++ b/scripts/flitcli/flit_disguise.py @@ -0,0 +1,204 @@ +# -- LICENSE BEGIN -- +# +# Copyright (c) 2015-2020, Lawrence Livermore National Security, LLC. +# +# Produced at the Lawrence Livermore National Laboratory +# +# Written by +# Michael Bentley (mikebentley15@gmail.com), +# Geof Sawaya (fredricflinstone@gmail.com), +# and Ian Briggs (ian.briggs@utah.edu) +# under the direction of +# Ganesh Gopalakrishnan +# and Dong H. Ahn. +# +# LLNL-CODE-743137 +# +# All rights reserved. +# +# This file is part of FLiT. For details, see +# https://pruners.github.io/flit +# Please also read +# https://github.com/PRUNERS/FLiT/blob/master/LICENSE +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the disclaimer below. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the disclaimer +# (as noted below) in the documentation and/or other materials +# provided with the distribution. +# +# - Neither the name of the LLNS/LLNL nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL +# SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# +# Additional BSD Notice +# +# 1. This notice is required to be provided under our contract +# with the U.S. Department of Energy (DOE). This work was +# produced at Lawrence Livermore National Laboratory under +# Contract No. DE-AC52-07NA27344 with the DOE. +# +# 2. Neither the United States Government nor Lawrence Livermore +# National Security, LLC nor any of their employees, makes any +# warranty, express or implied, or assumes any liability or +# responsibility for the accuracy, completeness, or usefulness of +# any information, apparatus, product, or process disclosed, or +# represents that its use would not infringe privately-owned +# rights. +# +# 3. Also, reference herein to any specific commercial products, +# process, or services by trade name, trademark, manufacturer or +# otherwise does not necessarily constitute or imply its +# endorsement, recommendation, or favoring by the United States +# Government or Lawrence Livermore National Security, LLC. The +# views and opinions of authors expressed herein do not +# necessarily state or reflect those of the United States +# Government or Lawrence Livermore National Security, LLC, and +# shall not be used for advertising or product endorsement +# purposes. +# +# -- LICENSE END -- + +'Implements the make subcommand' + +import argparse +import glob +import multiprocessing +import subprocess +import sys + +import flit_import + +brief_description = 'Anonymizes project-specific data from text files' + +def populate_parser(parser=None): + 'Populate or create an ArgumentParser' + if parser is None: + parser = argparse.ArgumentParser() + parser.description = ''' + This command disguises (a.k.a., anonymizes) text or log files. Fields + that can be disguised are source file names, test names, and function names. + + To accomplish this feat, a mapping csv file will either be provided by + the user (with --disguise-map) or will be autogenerated as + "disguise.csv" (default behavior) from the contents of the Makefile and + the object files for the baseline compilation. The mapping is applied + as a very simple search and replace. + + To undo the disguise, use the --undo flag either allowing the default + "disguise.csv" file to be used or specifying one with --disguise-map. + ''' + parser.add_argument('-g', '--generate', action='store_true', + help=''' + Just generate the disguise map as "disguise.csv" + and then exit. It will be overwritten if it + already exists. Most users will not need to use + this flag. + ''') + parser.add_argument('-o', '--output', + help=''' + Output the disguised version of the input file to + this specified file. The default behavior is to + output to standard output. + ''') + parser.add_argument('-m', '--disguise-map', default='disguise.csv', + help=''' + Specify a specific CSV file to use as the disguise + map. The CSV file is expected to have a header row + with the column names "value" and "disguise". Both + columns should have unique values, i.e., a + one-to-one mapping. The default is "disguise.csv" + which is autogenerated if it is not there. + ''') + parser.add_argument('-u', '--undo', action='store_true', + help=''' + Undo the disguising. This will use the disguise + map to do search and replace in reverse. For + example, this can be used to de-anonymize the + analysis done by someone with the anonymized + file(s). + ''') + parser.add_argument('--fields', default='file,function,test', + help=''' + A comma-separated list of fields you want to + disguise. This will not effect the generated + disguise map, all fields will be present there. It + will just impact the disguising and the undo + operations. + + Available fields are + (1) "file": source file name and file path + (including compiled object file(s)), + (2) "function": function name, both mangled and + demangled, and + (3) "test": name of the test. + ''') + parser.add_argument('file', + help=''' + Text file to disguise. Disguising is done with + simple search and replace. The "value" column of + the disguise map CSV file is searched for and + replaced with the "value" field. If --undo is + specified, then it is done in reverse. + ''') + return parser + +def main(arguments, prog=None): + 'Main logic here' + parser = populate_parser() + if prog: parser.prog = prog + args = parser.parse_args(arguments) + + ###check_call_kwargs = dict() + ###if args.quiet: + ### check_call_kwargs['stdout'] = subprocess.DEVNULL + ### #check_call_kwargs['stderr'] = subprocess.DEVNULL + ###make_args = [] + ###if args.make_args is not None: + ### make_args = args.make_args.split(',') + + #### TODO: can we make a progress bar here? + ###print('Calling GNU Make for the runbuild') + ###subprocess.check_call([ + ### 'make', + ### 'runbuild', + ### '-j{0}'.format(args.jobs), + ### ] + make_args, **check_call_kwargs) + ###print('Calling GNU Make to execute the tests') + ###subprocess.check_call([ + ### 'make', + ### 'run', + ### '-j{0}'.format(args.exec_jobs), + ### ] + make_args, **check_call_kwargs) + ###print('Importing into the database') + #### TODO: find a way to not import over again if called multiple times + ###status = flit_import.main(['--label', args.label] + + ### glob.glob('results/*_out.csv')) + ###if status != 0: + ### return status + + return 0 + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) From 0f9b0764764a2594a579d6af748d74ae0ffebf7e Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 29 Jul 2020 13:41:47 -0600 Subject: [PATCH 13/22] flit_disguise: add some failing tests --- scripts/flitcli/flit_disguise.py | 5 +- tests/flit_cli/flit_disguise/Makefile | 27 +++ .../flit_disguise/tst_flit_disguise.py | 171 ++++++++++++++++++ 3 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 tests/flit_cli/flit_disguise/Makefile create mode 100755 tests/flit_cli/flit_disguise/tst_flit_disguise.py diff --git a/scripts/flitcli/flit_disguise.py b/scripts/flitcli/flit_disguise.py index ae257def..866707df 100644 --- a/scripts/flitcli/flit_disguise.py +++ b/scripts/flitcli/flit_disguise.py @@ -154,13 +154,16 @@ def populate_parser(parser=None): demangled, and (3) "test": name of the test. ''') - parser.add_argument('file', + parser.add_argument('file', nargs='?', help=''' Text file to disguise. Disguising is done with simple search and replace. The "value" column of the disguise map CSV file is searched for and replaced with the "value" field. If --undo is specified, then it is done in reverse. + + If the file is not specified, then it is read from + standard in. ''') return parser diff --git a/tests/flit_cli/flit_disguise/Makefile b/tests/flit_cli/flit_disguise/Makefile new file mode 100644 index 00000000..4ce0ee57 --- /dev/null +++ b/tests/flit_cli/flit_disguise/Makefile @@ -0,0 +1,27 @@ +RUNNER := python3 +SRC := $(wildcard tst_*.py) +RUN_TARGETS := $(SRC:%.py=run_%) + +include ../../color_out.mk + +ifndef VERBOSE +.SILENT: +endif + +.PHONY: check help clean build run_% +check: $(TARGETS) $(RUN_TARGETS) + +help: + @echo "Makefile for running tests on FLiT framework" + @echo " help print this help documentation and exit" + @echo " build just compile the targets" + @echo " check run tests and print results to the console" + @echo " clean remove all generated files" + +build: +clean: + +run_% : %.py + @$(call color_out_noline,BROWN, running) + @echo " $<" + $(RUNNER) $< diff --git a/tests/flit_cli/flit_disguise/tst_flit_disguise.py b/tests/flit_cli/flit_disguise/tst_flit_disguise.py new file mode 100755 index 00000000..532836a6 --- /dev/null +++ b/tests/flit_cli/flit_disguise/tst_flit_disguise.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +# -- LICENSE BEGIN -- +# +# Copyright (c) 2015-2020, Lawrence Livermore National Security, LLC. +# +# Produced at the Lawrence Livermore National Laboratory +# +# Written by +# Michael Bentley (mikebentley15@gmail.com), +# Geof Sawaya (fredricflinstone@gmail.com), +# and Ian Briggs (ian.briggs@utah.edu) +# under the direction of +# Ganesh Gopalakrishnan +# and Dong H. Ahn. +# +# LLNL-CODE-743137 +# +# All rights reserved. +# +# This file is part of FLiT. For details, see +# https://pruners.github.io/flit +# Please also read +# https://github.com/PRUNERS/FLiT/blob/master/LICENSE +# +# Redistribution and use in source and binary forms, with or +# without modification, are permitted provided that the following +# conditions are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the disclaimer below. +# +# - Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the disclaimer +# (as noted below) in the documentation and/or other materials +# provided with the distribution. +# +# - Neither the name of the LLNS/LLNL nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL +# SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# +# Additional BSD Notice +# +# 1. This notice is required to be provided under our contract +# with the U.S. Department of Energy (DOE). This work was +# produced at Lawrence Livermore National Laboratory under +# Contract No. DE-AC52-07NA27344 with the DOE. +# +# 2. Neither the United States Government nor Lawrence Livermore +# National Security, LLC nor any of their employees, makes any +# warranty, express or implied, or assumes any liability or +# responsibility for the accuracy, completeness, or usefulness of +# any information, apparatus, product, or process disclosed, or +# represents that its use would not infringe privately-owned +# rights. +# +# 3. Also, reference herein to any specific commercial products, +# process, or services by trade name, trademark, manufacturer or +# otherwise does not necessarily constitute or imply its +# endorsement, recommendation, or favoring by the United States +# Government or Lawrence Livermore National Security, LLC. The +# views and opinions of authors expressed herein do not +# necessarily state or reflect those of the United States +# Government or Lawrence Livermore National Security, LLC, and +# shall not be used for advertising or product endorsement +# purposes. +# +# -- LICENSE END -- + +''' +Tests FLiT's disguise subcommand as integration tests +''' + +import unittest as ut +import tempfile +from io import StringIO + +import sys +before_path = sys.path[:] +sys.path.append('../..') +import test_harness as th +sys.path = before_path + +NamedTempFile = lambda: tempfile.NamedTemporaryFile(mode='wt', buffering=1) + +class FlitTestBase(ut.TestCase): + + def capture_flit(self, args): + ''' + Runs the flit command-line tool with the given args. Returns the + standard output from the flit run as a list of lines. + ''' + print(args) + with StringIO() as ostream: + retval = th.flit.main(args, outstream=ostream) + lines = ostream.getvalue().splitlines() + self.assertEqual(retval, 0) + return lines + + def run_flit(self, args): + 'Runs flit ignoring standard output' + self.capture_flit(args) + +class FlitDisguiseTest(FlitTestBase): + + def setup_flitdir(self, directory): + self.run_flit(['init', '--directory', directory]) + + def disguise_string(self, content, fields=None, mapping=None, undo=False): + 'Runs flit disguise on the content and returns the disguised version' + with NamedTempFile() as fcontent: + args = ['disguise', fcontent.name] + + if fields is not None: + args.extend(['--fields', ','.join(fields)]) + + if undo: + args.append('--undo') + + if mapping is not None: + with NamedTempFile() as fout: + fout.write('disguise,value\n') + fout.file.writelines(['"{}","{}"\n'.format(value, key) + for key, value in mapping.items()]) + args.extend(['--disguise-map', fout.name]) + return self.capture_flit(args) + + return self.capture_flit(args) + + def test_generate_map_default_flit_init(self): + with th.util.tempdir() as flitdir: + self.setup_flitdir(flitdir) + with th.util.pushd(flitdir): + output = self.capture_flit(['disguise', '--generate']) + self.assertEqual(output, ['Created disguise.csv']) + with open('disguise.csv') as disguise_in: + disguise_contents = disguise_in.readlines() + expected_disguise_contents = [ + 'disguise,value\n', + 'file-00001,ALL-FLIT.cpp\n', + 'file-00002,Empty.cpp\n', + 'file-00003,main.cpp\n', + 'path-00001,tests/Empty.cpp\n', + 'func-00001,main\n', + ] + self.assertEqual(disguise_contents, expected_disguise_contents) + + def test_disguise_empty_map(self): + to_disguise = ''' + Just some file contents. + Nothing to worry about here. + ''' + disguised = self.disguise_string(to_disguise, mapping={}) + self.assertEqual(disguised, to_disguise.splitlines()) + +if __name__ == '__main__': + sys.exit(th.unittest_main()) From f38e5d848f9a21530e13ba1efbcec23dc5959037 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 29 Jul 2020 17:32:50 -0600 Subject: [PATCH 14/22] flit-disguise: add more tests --- .../flit_disguise/tst_flit_disguise.py | 64 +++++++++++++++++-- 1 file changed, 57 insertions(+), 7 deletions(-) diff --git a/tests/flit_cli/flit_disguise/tst_flit_disguise.py b/tests/flit_cli/flit_disguise/tst_flit_disguise.py index 532836a6..4d0bc753 100755 --- a/tests/flit_cli/flit_disguise/tst_flit_disguise.py +++ b/tests/flit_cli/flit_disguise/tst_flit_disguise.py @@ -104,7 +104,7 @@ def capture_flit(self, args): Runs the flit command-line tool with the given args. Returns the standard output from the flit run as a list of lines. ''' - print(args) + print('flit', args) with StringIO() as ostream: retval = th.flit.main(args, outstream=ostream) lines = ostream.getvalue().splitlines() @@ -160,12 +160,62 @@ def test_generate_map_default_flit_init(self): self.assertEqual(disguise_contents, expected_disguise_contents) def test_disguise_empty_map(self): - to_disguise = ''' - Just some file contents. - Nothing to worry about here. - ''' - disguised = self.disguise_string(to_disguise, mapping={}) - self.assertEqual(disguised, to_disguise.splitlines()) + to_disguise = [ + ' Just some file contents.', + 'Nothing to worry about here.', + ] + disguised = self.disguise_string('\n'.join(to_disguise), mapping={}) + self.assertEqual(disguised, to_disguise) + + def test_disguise_normal(self): + disguise_mapping = { + 'disguise-01': 'map', + 'disguise-02': 'hi', + 'disguise-03': 'function(string, int, int)', + 'disguise-04': 'not found', + } + to_disguise = [ + 'hi there chico', + 'may mapping map file is not so good', + 'has a function called function(string, int, int).', + ] + expected_disguised = [ + 'disguise-02 there chico', + 'may mapping disguise-01 file is not so good', + 'has a function called disguise-03.', + ] + disguised = self.disguise_string( + '\n'.join(to_disguise), mapping=disguise_mapping) + self.assertEqual(disguised, to_disguise) + + def test_disguise_normal_undo(self): + disguise_mapping = { + 'disguise-01': 'map', + 'disguise-02': 'hi', + 'disguise-03': 'function(string, int, int)', + 'disguise-04': 'not found', + } + disguised = [ + 'disguise-02 there chico', + 'may mapping disguise-01 file is not so good', + 'has a function called disguise-03.', + ] + expected_undisguised = [ + 'hi there chico', + 'may mapping map file is not so good', + 'has a function called function(string, int, int).', + ] + undisguised = self.disguise_string( + '\n'.join(disguised), mapping=disguise_mapping, undo=True) + self.assertEqual(undisguised, expected_undisguised) + + def test_disguise_bad_map_file(self): + with NamedTempFile() as mapfile: + mapfile.write('not the correct header\n' + 'does not matter\n' + 'what the rest has...\n') + with self.assertRaises(AssertionError): + self.run_flit(['disguise', '--disguise-map', mapfile.name]) if __name__ == '__main__': sys.exit(th.unittest_main()) From 9fc34c85d696fd8b5bdfe91b8185c370b28356b6 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Tue, 1 Sep 2020 15:41:16 -0600 Subject: [PATCH 15/22] flit-disguise: read mapping --- scripts/flitcli/flit_disguise.py | 80 +++++++++++++++++++------------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/scripts/flitcli/flit_disguise.py b/scripts/flitcli/flit_disguise.py index 866707df..a58c3710 100644 --- a/scripts/flitcli/flit_disguise.py +++ b/scripts/flitcli/flit_disguise.py @@ -80,16 +80,13 @@ # # -- LICENSE END -- -'Implements the make subcommand' +'Implements the disguise subcommand to anonymize project-specific data' import argparse -import glob -import multiprocessing -import subprocess +import csv +import subprocess as subp import sys -import flit_import - brief_description = 'Anonymizes project-specific data from text files' def populate_parser(parser=None): @@ -167,39 +164,56 @@ def populate_parser(parser=None): ''') return parser +def generate_disguise_map(): + 'Generate the disguise map, often called from the Makefile' + print('Created disguise.csv') + with open('disguise.csv', 'w') as fout: + fout.write('hello') + +def check_disguise_map_regenerate(): + 'check to see if disguise.csv needs regenerating and do it if so.' + # TODO: implement + #subp.check_call(['make', 'disguise.csv']) + pass + +def read_disguise_map(fname): + 'Read and return the forward and reverse dictionary of the disguise map' + forward_map = {} + reverse_map = {} + with open(fname, 'r') as fin: + reader = csv.DictReader(fin) + assert 'disguise' in reader.fieldnames + assert 'value' in reader.fieldnames + for entry in reader: + disguise, value = entry['disguise'], entry['value'] + assert disguise not in forward_map + assert value not in reverse_map + foward_map[disguise] = value + reverse_map[value] = disguise + return forward_map, reverse_map + def main(arguments, prog=None): 'Main logic here' parser = populate_parser() if prog: parser.prog = prog args = parser.parse_args(arguments) - ###check_call_kwargs = dict() - ###if args.quiet: - ### check_call_kwargs['stdout'] = subprocess.DEVNULL - ### #check_call_kwargs['stderr'] = subprocess.DEVNULL - ###make_args = [] - ###if args.make_args is not None: - ### make_args = args.make_args.split(',') - - #### TODO: can we make a progress bar here? - ###print('Calling GNU Make for the runbuild') - ###subprocess.check_call([ - ### 'make', - ### 'runbuild', - ### '-j{0}'.format(args.jobs), - ### ] + make_args, **check_call_kwargs) - ###print('Calling GNU Make to execute the tests') - ###subprocess.check_call([ - ### 'make', - ### 'run', - ### '-j{0}'.format(args.exec_jobs), - ### ] + make_args, **check_call_kwargs) - ###print('Importing into the database') - #### TODO: find a way to not import over again if called multiple times - ###status = flit_import.main(['--label', args.label] + - ### glob.glob('results/*_out.csv')) - ###if status != 0: - ### return status + # TODO: implement + if args.generate: + generate_disguise_map() + return 0 + + # TODO: implement + if args.disguise_map == 'disguise.csv': + check_disguise_map_regenerate() + + forward_map, reverse_map = read_disguise_map(args.disguise_map) + + # choose the output stream + if args.output: + out = open(args.output, 'w') + else: + out = sys.stdout return 0 From 26548255d5e1fc21199d298f244e14b916d4bc0e Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Tue, 1 Sep 2020 18:03:02 -0600 Subject: [PATCH 16/22] flit_disguise: implement disguise from given map --- scripts/flitcli/flit_disguise.py | 38 ++++++++++++++++--- .../flit_disguise/tst_flit_disguise.py | 8 +++- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/scripts/flitcli/flit_disguise.py b/scripts/flitcli/flit_disguise.py index a58c3710..a54a3671 100644 --- a/scripts/flitcli/flit_disguise.py +++ b/scripts/flitcli/flit_disguise.py @@ -84,6 +84,7 @@ import argparse import csv +import re import subprocess as subp import sys @@ -186,10 +187,10 @@ def read_disguise_map(fname): assert 'value' in reader.fieldnames for entry in reader: disguise, value = entry['disguise'], entry['value'] - assert disguise not in forward_map - assert value not in reverse_map - foward_map[disguise] = value - reverse_map[value] = disguise + assert value not in forward_map + assert disguise not in reverse_map + forward_map[value] = disguise + reverse_map[disguise] = value return forward_map, reverse_map def main(arguments, prog=None): @@ -208,12 +209,37 @@ def main(arguments, prog=None): check_disguise_map_regenerate() forward_map, reverse_map = read_disguise_map(args.disguise_map) + mapping_to_use = reverse_map if args.undo else forward_map + + # choose the input stream + if args.file: + fin = open(args.file, 'r') + else: + fin = sys.stdin # choose the output stream if args.output: - out = open(args.output, 'w') + fout = open(args.output, 'w') else: - out = sys.stdout + fout = sys.stdout + + # like "grep -w" with a replace + for line in fin: + for key, val in mapping_to_use.items(): + pattern = re.escape(key) + if key[0].isalpha(): + pattern = r'\b' + pattern + if key[-1].isalpha(): + pattern = pattern + r'\b' + if re.search(pattern, line): + fout.write(re.sub(pattern, val, line)) + break + else: + fout.write(line) + + fout.flush() + if args.file: fin.close() + if args.output: fout.close() return 0 diff --git a/tests/flit_cli/flit_disguise/tst_flit_disguise.py b/tests/flit_cli/flit_disguise/tst_flit_disguise.py index 4d0bc753..46e9cf18 100755 --- a/tests/flit_cli/flit_disguise/tst_flit_disguise.py +++ b/tests/flit_cli/flit_disguise/tst_flit_disguise.py @@ -123,6 +123,8 @@ def setup_flitdir(self, directory): def disguise_string(self, content, fields=None, mapping=None, undo=False): 'Runs flit disguise on the content and returns the disguised version' with NamedTempFile() as fcontent: + fcontent.write(content) + fcontent.flush() args = ['disguise', fcontent.name] if fields is not None: @@ -134,8 +136,9 @@ def disguise_string(self, content, fields=None, mapping=None, undo=False): if mapping is not None: with NamedTempFile() as fout: fout.write('disguise,value\n') - fout.file.writelines(['"{}","{}"\n'.format(value, key) + fout.file.writelines(['"{}","{}"\n'.format(key, value) for key, value in mapping.items()]) + fout.flush() args.extend(['--disguise-map', fout.name]) return self.capture_flit(args) @@ -186,7 +189,7 @@ def test_disguise_normal(self): ] disguised = self.disguise_string( '\n'.join(to_disguise), mapping=disguise_mapping) - self.assertEqual(disguised, to_disguise) + self.assertEqual(disguised, expected_disguised) def test_disguise_normal_undo(self): disguise_mapping = { @@ -205,6 +208,7 @@ def test_disguise_normal_undo(self): 'may mapping map file is not so good', 'has a function called function(string, int, int).', ] + undisguised = self.disguise_string( '\n'.join(disguised), mapping=disguise_mapping, undo=True) self.assertEqual(undisguised, expected_undisguised) From 9459309e37fa1a835c77ae25c8537d8f27d539e1 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Tue, 22 Sep 2020 14:19:52 -0600 Subject: [PATCH 17/22] flit_disguise: implement map generation I believe this means flit_disguise is fully implemented --- scripts/flitcli/flit_disguise.py | 102 ++++++++++++++++-- .../flit_disguise/tst_flit_disguise.py | 12 ++- 2 files changed, 100 insertions(+), 14 deletions(-) diff --git a/scripts/flitcli/flit_disguise.py b/scripts/flitcli/flit_disguise.py index a54a3671..4e766aa5 100644 --- a/scripts/flitcli/flit_disguise.py +++ b/scripts/flitcli/flit_disguise.py @@ -82,8 +82,12 @@ 'Implements the disguise subcommand to anonymize project-specific data' +import flitutil as util +import flit_bisect as bisect + import argparse import csv +import os import re import subprocess as subp import sys @@ -112,7 +116,9 @@ def populate_parser(parser=None): Just generate the disguise map as "disguise.csv" and then exit. It will be overwritten if it already exists. Most users will not need to use - this flag. + this flag. If you use this flag with the + --disguise-map flag, then it will output the map to + the specified disguise map. ''') parser.add_argument('-o', '--output', help=''' @@ -165,11 +171,91 @@ def populate_parser(parser=None): ''') return parser -def generate_disguise_map(): +def generate_disguise_map(outfile='disguise.csv'): 'Generate the disguise map, often called from the Makefile' - print('Created disguise.csv') - with open('disguise.csv', 'w') as fout: - fout.write('hello') + + # make sure gtrun is compiled + subp.check_call(['make', 'gtrun']) + makevars = util.extract_make_vars() + + # get list of source files + sources = sorted(makevars['SOURCE']) + + # get list of object files + objdir = makevars['GT_OBJ_DIR'][0] + objects = sorted([os.path.basename(source) + '.o' for source in sources]) + + # get list of function symbols and demangled signatures + symbol_objects, _ = bisect.extract_symbols(sources, objdir) + symbols = [sym.symbol for sym in symbol_objects] + demangled = [sym.demangled for sym in symbol_objects] + + # get list of tests + tests = subp.check_output(['./gtrun', '--list-tests']).decode('utf-8').splitlines() + + seen_values = set() + + # write mapping to file + with open(outfile, 'w') as fout: + writer = csv.DictWriter(fout, ['disguise', 'value']) + writer.writeheader() + + def writerows(disguise_base, values): + 'Only write rows that have a unique value' + unique_values = [val for val in values if val not in seen_values] + seen_values.update(unique_values) + writer.writerows(gen_disguise_list(disguise_base, unique_values)) + + writerows('objfile', objects) + writerows('filepath', sources) + writerows('filename', [os.path.basename(x) for x in sources]) + writerows('symbol', symbols) + writerows('demangled', demangled) + writerows('test', tests) + + print('Created {}'.format(outfile)) + +def gen_disguise_list(disguise_base, values): + ''' + Generates a list of dictionaries for insertion into a disguise map. + Will add an integer to the disguise base, zero padded based on the number + of values in the given list of values. + + @param disguise_base (str): basename of the disguise value + @param values (list(str)): values to be disguised in this order + + >>> gen_disguise_list('ababab', []) + [] + + >>> expected = [ + ... {'disguise': 'happy-1', 'value': 'me'}, + ... {'disguise': 'happy-2', 'value': 'myself'}, + ... {'disguise': 'happy-3', 'value': 'I'}, + ... ] + >>> expected == gen_disguise_list('happy', ['me', 'myself', 'I']) + True + + >>> expected = [ + ... {'disguise': 'sad-01', 'value': '0'}, + ... {'disguise': 'sad-02', 'value': '1'}, + ... {'disguise': 'sad-03', 'value': '2'}, + ... {'disguise': 'sad-04', 'value': '3'}, + ... {'disguise': 'sad-05', 'value': '4'}, + ... {'disguise': 'sad-06', 'value': '5'}, + ... {'disguise': 'sad-07', 'value': '6'}, + ... {'disguise': 'sad-08', 'value': '7'}, + ... {'disguise': 'sad-09', 'value': '8'}, + ... {'disguise': 'sad-10', 'value': '9'}, + ... {'disguise': 'sad-11', 'value': '10'}, + ... ] + >>> expected == gen_disguise_list('sad', [str(i) for i in range(11)]) + True + ''' + ndigits = len(str(len(values))) + format_str = '{}-{{i:0{}d}}'.format(disguise_base, ndigits) + disguises = [{'disguise': format_str.format(i=i+1), 'value': val} + for i, val in enumerate(values)] + return disguises def check_disguise_map_regenerate(): 'check to see if disguise.csv needs regenerating and do it if so.' @@ -199,14 +285,12 @@ def main(arguments, prog=None): if prog: parser.prog = prog args = parser.parse_args(arguments) - # TODO: implement if args.generate: - generate_disguise_map() + generate_disguise_map(args.disguise_map) return 0 - # TODO: implement if args.disguise_map == 'disguise.csv': - check_disguise_map_regenerate() + generate_disguise_map(args.disguise_map) forward_map, reverse_map = read_disguise_map(args.disguise_map) mapping_to_use = reverse_map if args.undo else forward_map diff --git a/tests/flit_cli/flit_disguise/tst_flit_disguise.py b/tests/flit_cli/flit_disguise/tst_flit_disguise.py index 46e9cf18..40429cbe 100755 --- a/tests/flit_cli/flit_disguise/tst_flit_disguise.py +++ b/tests/flit_cli/flit_disguise/tst_flit_disguise.py @@ -154,11 +154,13 @@ def test_generate_map_default_flit_init(self): disguise_contents = disguise_in.readlines() expected_disguise_contents = [ 'disguise,value\n', - 'file-00001,ALL-FLIT.cpp\n', - 'file-00002,Empty.cpp\n', - 'file-00003,main.cpp\n', - 'path-00001,tests/Empty.cpp\n', - 'func-00001,main\n', + 'objfile-1,Empty.cpp.o\n', + 'objfile-2,main.cpp.o\n', + 'filepath-1,main.cpp\n', + 'filepath-2,tests/Empty.cpp\n', + 'filename-1,Empty.cpp\n', + 'symbol-1,main\n', + 'test-1,Empty\n', ] self.assertEqual(disguise_contents, expected_disguise_contents) From 1064a41de0d6afbd132922b6c858aa1d2e61685a Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Wed, 23 Sep 2020 11:34:09 -0600 Subject: [PATCH 18/22] flit_disguise: use flitelf_nm directly and fix tests --- scripts/flitcli/flit_disguise.py | 24 ++++++++++-------- .../flit_disguise/tst_flit_disguise.py | 25 ++++++++++--------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/scripts/flitcli/flit_disguise.py b/scripts/flitcli/flit_disguise.py index 4e766aa5..333de9bb 100644 --- a/scripts/flitcli/flit_disguise.py +++ b/scripts/flitcli/flit_disguise.py @@ -87,10 +87,15 @@ import argparse import csv +import glob import os import re import subprocess as subp import sys +try: + import flitelf_nm as elf +except ImportError: + elf = None brief_description = 'Anonymizes project-specific data from text files' @@ -186,9 +191,12 @@ def generate_disguise_map(outfile='disguise.csv'): objects = sorted([os.path.basename(source) + '.o' for source in sources]) # get list of function symbols and demangled signatures - symbol_objects, _ = bisect.extract_symbols(sources, objdir) - symbols = [sym.symbol for sym in symbol_objects] - demangled = [sym.demangled for sym in symbol_objects] + #symbol_objects, _ = bisect.extract_symbols(sources, objdir) # too slow + #symbol_objects, _ = elf.extract_symbols('gtrun') # too few symbols + symbol_objects, _ = elf.extract_symbols([ + os.path.join(objdir, obj) for obj in objects]) + symbols = sorted(sym.symbol for sym in symbol_objects) + demangled = sorted(sym.demangled for sym in symbol_objects) # get list of tests tests = subp.check_output(['./gtrun', '--list-tests']).decode('utf-8').splitlines() @@ -208,10 +216,10 @@ def writerows(disguise_base, values): writerows('objfile', objects) writerows('filepath', sources) - writerows('filename', [os.path.basename(x) for x in sources]) + writerows('filename', sorted(os.path.basename(x) for x in sources)) writerows('symbol', symbols) writerows('demangled', demangled) - writerows('test', tests) + writerows('test', sorted(tests)) print('Created {}'.format(outfile)) @@ -257,12 +265,6 @@ def gen_disguise_list(disguise_base, values): for i, val in enumerate(values)] return disguises -def check_disguise_map_regenerate(): - 'check to see if disguise.csv needs regenerating and do it if so.' - # TODO: implement - #subp.check_call(['make', 'disguise.csv']) - pass - def read_disguise_map(fname): 'Read and return the forward and reverse dictionary of the disguise map' forward_map = {} diff --git a/tests/flit_cli/flit_disguise/tst_flit_disguise.py b/tests/flit_cli/flit_disguise/tst_flit_disguise.py index 40429cbe..a04855da 100755 --- a/tests/flit_cli/flit_disguise/tst_flit_disguise.py +++ b/tests/flit_cli/flit_disguise/tst_flit_disguise.py @@ -88,6 +88,7 @@ import unittest as ut import tempfile from io import StringIO +import re import sys before_path = sys.path[:] @@ -104,7 +105,6 @@ def capture_flit(self, args): Runs the flit command-line tool with the given args. Returns the standard output from the flit run as a list of lines. ''' - print('flit', args) with StringIO() as ostream: retval = th.flit.main(args, outstream=ostream) lines = ostream.getvalue().splitlines() @@ -152,17 +152,18 @@ def test_generate_map_default_flit_init(self): self.assertEqual(output, ['Created disguise.csv']) with open('disguise.csv') as disguise_in: disguise_contents = disguise_in.readlines() - expected_disguise_contents = [ - 'disguise,value\n', - 'objfile-1,Empty.cpp.o\n', - 'objfile-2,main.cpp.o\n', - 'filepath-1,main.cpp\n', - 'filepath-2,tests/Empty.cpp\n', - 'filename-1,Empty.cpp\n', - 'symbol-1,main\n', - 'test-1,Empty\n', - ] - self.assertEqual(disguise_contents, expected_disguise_contents) + self.assertEqual('disguise,value\n', disguise_contents[0]) + self.assertEqual('objfile-1,Empty.cpp.o\n', disguise_contents[1]) + self.assertEqual('objfile-2,main.cpp.o\n', disguise_contents[2]) + self.assertEqual('filepath-1,main.cpp\n', disguise_contents[3]) + self.assertEqual('filepath-2,tests/Empty.cpp\n', disguise_contents[4]) + self.assertEqual('filename-1,Empty.cpp\n', disguise_contents[5]) + self.assertEqual('test-1,Empty\n', disguise_contents[-1]) + expected_symbols = ['main'] + symbol_lines = [x for x in disguise_contents if x.startswith('symbol')] + for symbol in expected_symbols: + self.assertTrue(any(re.match('symbol-\d*,{}\n'.format(symbol), line) + for line in disguise_contents)) def test_disguise_empty_map(self): to_disguise = [ From 23390001bb8b60260163773cc9dbc3314f5276df Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Sat, 3 Oct 2020 16:30:41 -0600 Subject: [PATCH 19/22] Remove pyelftools completely --- .travis.yml | 4 - Makefile | 1 - documentation/flit-command-line.md | 8 +- documentation/installation.md | 9 +- scripts/flitcli/flit_bisect.py | 7 +- scripts/flitcli/flit_disguise.py | 4 +- scripts/flitcli/flitelf.py | 260 ++++++------------- scripts/flitcli/flitelf_nm.py | 204 --------------- tests/flit_install/tst_install_runthrough.py | 1 - 9 files changed, 91 insertions(+), 407 deletions(-) delete mode 100644 scripts/flitcli/flitelf_nm.py diff --git a/.travis.yml b/.travis.yml index 9738731c..394eb218 100644 --- a/.travis.yml +++ b/.travis.yml @@ -95,7 +95,6 @@ matrix: # Job 1: OpenMPI - env: - mpi_type=openmpi - - extra_pip=pyelftools addons: apt: packages: @@ -105,7 +104,6 @@ matrix: # Job 2: MPICH - env: - mpi_type=mpich - - extra_pip= addons: apt: packages: @@ -115,7 +113,6 @@ matrix: # Job 3: No MPI - env: - mpi_type=none - - extra_pip= addons: apt: packages: @@ -123,7 +120,6 @@ matrix: before_install: - pip3 install --user setuptools - - pip3 install --user toml $extra_pip script: dpkg --list | grep binutil && make -j4 && make -j4 -C tests && make check diff --git a/Makefile b/Makefile index 4f887eca..2a421aa2 100644 --- a/Makefile +++ b/Makefile @@ -93,7 +93,6 @@ install: install -m 0644 $(SCRIPT_DIR)/experimental/ninja_syntax.py $(INST_SHAREDIR)/scripts/experimental/ install -m 0644 $(SCRIPT_DIR)/flitutil.py $(INST_SHAREDIR)/scripts/ install -m 0644 $(SCRIPT_DIR)/flitelf.py $(INST_SHAREDIR)/scripts/ - install -m 0644 $(SCRIPT_DIR)/flitelf_nm.py $(INST_SHAREDIR)/scripts/ install -m 0644 $(SCRIPT_DIR)/README.md $(INST_SHAREDIR)/scripts/ @$(call color_out,BROWN, Intalling bash-completion script into $(INST_BASH_COMPLETE_DIR)) @$(call color_out,GREEN, You can source it in your ~/.bashrc or copy it to /etc/bash_completion.d/) diff --git a/documentation/flit-command-line.md b/documentation/flit-command-line.md index 0ab2a047..dbff00b4 100644 --- a/documentation/flit-command-line.md +++ b/documentation/flit-command-line.md @@ -162,11 +162,6 @@ flit import --dbfile temporary.sqlite backup/results/*.csv ## flit bisect -There is an additional optional dependency in order to run `flit bisect`. That -is [pyelftools](https://github.com/eliben/pyelftools) as discussed in [FLiT -Installation](installation.md). If `pyelftools` is not installed, then -`bisect` is disabled. - After FLiT identifies compilations that cause some tests to exhibit variability, one may want to investigate further and understand where the compiler introduced overly aggressive optimizations. @@ -177,6 +172,9 @@ blamed source files. You can run `flit bisect` directly giving it a specific compilation, precision, and test case, or you can tell it to automatically run for all differences in a given SQLite3 database. +FLiT Bisect depends on binutils to extract symbols, filenames, and line numbers +from compiled object files. + Here is an example of giving a single test case (named `subnormal`) known to show variability: diff --git a/documentation/installation.md b/documentation/installation.md index d26c1724..3ac3df75 100644 --- a/documentation/installation.md +++ b/documentation/installation.md @@ -31,9 +31,6 @@ Stuff you may need to get * [python3](https://www.python.org) * [toml](https://github.com/uiri/toml) module (for [TOML](https://github.com/toml-lang/toml) configuration files) - * (optional) [pyelftools](https://github.com/eliben/pyelftools) module for - parsing ELF files. This is used for `flit bisect`; all other functionality - will work without it. * [make](https://www.gnu.org/software/make) * A C++11 compatible compiler (see section [Compilers](#compilers) for supported versions) @@ -51,21 +48,21 @@ sudo apt install \ The python modules can be installed with `apt` ```bash -sudo apt install python3-toml python3-pyelftools +sudo apt install python3-toml ``` or with `pip` ```bash sudo apt install python3-pip -pip3 install --user toml pyelftools +pip3 install --user toml ``` For homebrew on OSX (besides installing [Xcode](https://developer.apple.com/xcode)) ```bash brew install make python3 gcc git -pip3 install toml pyelftools +pip3 install toml ``` If you install python version 3.0 or later, then you will need to have a diff --git a/scripts/flitcli/flit_bisect.py b/scripts/flitcli/flit_bisect.py index ffb944f1..1aaed6fc 100644 --- a/scripts/flitcli/flit_bisect.py +++ b/scripts/flitcli/flit_bisect.py @@ -107,10 +107,7 @@ try: import flitelf as elf except ImportError: - try: - import flitelf_nm as elf - except ImportError: - elf = None + elf = None brief_description = 'Bisect compilation to identify problematic source code' @@ -2225,7 +2222,7 @@ def main(arguments, prog=None): ''' if elf is None: - print('Error: pyelftools or binutils is not installed, bisect disabled', + print('Error: binutils is not installed, bisect disabled', file=sys.stderr) return 1 diff --git a/scripts/flitcli/flit_disguise.py b/scripts/flitcli/flit_disguise.py index 333de9bb..51faf08b 100644 --- a/scripts/flitcli/flit_disguise.py +++ b/scripts/flitcli/flit_disguise.py @@ -93,7 +93,7 @@ import subprocess as subp import sys try: - import flitelf_nm as elf + import flitelf as elf except ImportError: elf = None @@ -191,8 +191,6 @@ def generate_disguise_map(outfile='disguise.csv'): objects = sorted([os.path.basename(source) + '.o' for source in sources]) # get list of function symbols and demangled signatures - #symbol_objects, _ = bisect.extract_symbols(sources, objdir) # too slow - #symbol_objects, _ = elf.extract_symbols('gtrun') # too few symbols symbol_objects, _ = elf.extract_symbols([ os.path.join(objdir, obj) for obj in objects]) symbols = sorted(sym.symbol for sym in symbol_objects) diff --git a/scripts/flitcli/flitelf.py b/scripts/flitcli/flitelf.py index dddac5eb..88a69587 100644 --- a/scripts/flitcli/flitelf.py +++ b/scripts/flitcli/flitelf.py @@ -1,6 +1,3 @@ -# Much of this is copied from the examples given in -# https://github.com/eliben/pyelftools.git - # -- LICENSE BEGIN -- # # Copyright (c) 2015-2020, Lawrence Livermore National Security, LLC. @@ -84,16 +81,21 @@ # -- LICENSE END -- ''' -Utility functions for dealing with ELF binary files. This file requires the -pyelftools package to be installed (i.e. module elftools). +Utility functions for dealing with ELF binary files. This file uses +alternative methods to do this functionality that does not require the +pyelftools package. + +Instead, this package uses binutils through subprocesses. The programs used +are "nm" and "c++filt" to perform the same functionality. ''' -from collections import namedtuple +from collections import namedtuple, defaultdict import subprocess as subp import os +import shutil -from elftools.elf.elffile import ELFFile -from elftools.elf.sections import SymbolTableSection +if not shutil.which('nm') or not shutil.which('c++filt'): + raise ImportError('Cannot find binaries "nm" and "c++filt"') SymbolTuple = namedtuple('SymbolTuple', 'symbol, demangled, fname, lineno') @@ -106,194 +108,96 @@ lineno: line number of definition within fname. ''' -def extract_symbols(objfile): +def extract_symbols(objfile_or_list): ''' Extracts symbols for the given object file. - @param objfile: (str) path to object file + @param objfile_or_list: (str or list(str)) path to object file(s) @return two lists of SymbolTuple objects (funcsyms, remaining). The first is the list of exported functions that are strong symbols and have a filename and line number where they are defined. The second is all remaining symbols that are strong, exported, and defined. ''' - with open(objfile, 'rb') as fin: - elffile = ELFFile(fin) - - symtabs = [x for x in elffile.iter_sections() - if isinstance(x, SymbolTableSection)] - if len(symtabs) == 0: - raise RuntimeError('Object file {} does not have a symbol table' - .format(objfile)) - - # get globally exported defined symbols - syms = [sym for symtab in symtabs - for sym in symtab.iter_symbols() - if _is_symbol(sym) - and _is_extern(sym) - and _is_strong(sym) - and _is_defined(sym)] - - # split symbols into functions and non-functions - fsyms = [sym for sym in syms if _is_func(sym)] # functions - rsyms = list(set(syms).difference(fsyms)) # remaining - - # find filename and line numbers for each relevant func symbol - locs = _locate_symbols(elffile, fsyms) - - # demangle all symbols - fdemangled = _demangle([sym.name for sym in fsyms]) - rdemangled = _demangle([sym.name for sym in rsyms]) - - funcsym_tuples = [SymbolTuple(fsyms[i].name, fdemangled[i], - locs[i][0], locs[i][1]) - for i in range(len(fsyms))] - remaining_tuples = [SymbolTuple(rsyms[i].name, rdemangled[i], - None, None) - for i in range(len(rsyms))] - - return funcsym_tuples, remaining_tuples - -def _symbols(symtab): - 'Returns all symbols from the given symbol table' - return [sym for sym in symtab.iter_symbols() if _is_symbol(sym)] - -def _is_symbol(sym): - 'Returns True if elf.sections.Symbol object is a symbol' - return sym.name != '' and sym['st_info']['type'] != 'STT_FILE' - -def _is_extern(sym): - 'Returns True if elf.sections.Symbol is an extern symbol' - return sym['st_info']['bind'] != 'STB_LOCAL' - -def _is_weak(sym): - 'Returns True if elf.sections.Symbol is a weak symbol' - return sym['st_info']['bind'] == 'STB_WEAK' - -def _is_strong(sym): - 'Returns True if elf.sections.Symbol is a strong symbol' - return sym['st_info']['bind'] == 'STB_GLOBAL' - -def _is_defined(sym): - 'Returns True if elf.sections.Symbol is defined' - return sym['st_shndx'] != 'SHN_UNDEF' + funcsym_tuples = [] + remaining_tuples = [] + nm_args = [ + 'nm', + '--print-file-name', + '--extern-only', + '--defined-only', + ] + if isinstance(objfile_or_list, str): + nm_args.append(objfile_or_list) + else: + nm_args.extend(objfile_or_list) + symbol_strings = subp.check_output(nm_args).decode('utf-8').splitlines() + + obj_symbols = defaultdict(list) + symbols = [] + for symbol_string in symbol_strings: + loc, stype, symbol = symbol_string.split(maxsplit=2) + objfile, offset = loc.split(':') + symbols.append(symbol) + obj_symbols[objfile].append((offset, stype, symbol)) + + demangle_map = dict(zip(symbols, _demangle(symbols))) + + fileinfo_map = {} + linenumber_map = {} + for obj, symlist in obj_symbols.items(): + to_check = [] + for offset, stype, symbol in symlist: + if symbol in fileinfo_map and fileinfo_map[symbol]: + continue + elif stype.lower() != 't': + fileinfo_map[symbol] = (None, None) + else: + to_check.append((offset, symbol)) + fileinfo_map.update(_fnames_and_line_numbers(obj, to_check)) + + for symbol in symbols: + fnam, line = fileinfo_map[symbol] + symbol_tuple = SymbolTuple(symbol, demangle_map[symbol], fnam, line) + if fnam: + funcsym_tuples.append(symbol_tuple) + else: + remaining_tuples.append(symbol_tuple) -def _is_func(sym): - 'Returns True if elf.sections.Symbol is a function' - return sym['st_info']['type'] == 'STT_FUNC' + return funcsym_tuples, remaining_tuples def _demangle(symbol_list): 'Demangles each C++ name in the given list' + if not symbol_list: + return [] proc = subp.Popen(['c++filt'], stdin=subp.PIPE, stdout=subp.PIPE) out, _ = proc.communicate('\n'.join(symbol_list).encode()) demangled = out.decode('utf8').splitlines() assert len(demangled) == len(symbol_list) return demangled -def _locate_symbols(elffile, symbols): +def _fnames_and_line_numbers(objfile, offset_symbol_tuples): ''' - Locates the filename and line number of each symbol in the elf file. - - @param elffile: (elf.elffile.ELFFile) The top-level elf file - @param symbols: (list(elf.sections.Symbol)) symbols to locate - - @return list(tuple(filename, lineno)) in the order of the given symbols - - If the file does not have DWARF info or a symbol is not found, an exception - is raised - - Test that even without a proper elffile, if there are no symbols to match, - then no error occurs and you can be on your merry way. - >>> _locate_symbols(object(), []) - [] + Given a list of tuples of (offset, symbol), return a single dictionaries, a + mapping from symbol name to a tuple of (filename, line number). If the + filename and/or line number could not be determined, then both will be set + to None. ''' - if len(symbols) == 0: - return [] - - if not elffile.has_dwarf_info(): - raise RuntimeError('Elf file has no DWARF info') - - dwarfinfo = elffile.get_dwarf_info() - fltable = _gen_file_line_table(dwarfinfo) - - locations = [] - for sym in symbols: - for fname, lineno, start, end in fltable: - if start <= sym.entry['st_value'] < end: - locations.append((fname.decode('utf8'), lineno)) - break - else: - locations.append((None, None)) - - return locations - -def _gen_file_line_table(dwarfinfo): - ''' - Generates and returns a list of (filename, lineno, startaddr, endaddr). - - Tests that an empty dwarfinfo object will result in an empty return list - >>> class FakeDwarf: - ... def __init__(self): - ... pass - ... def iter_CUs(self): - ... return [] - >>> _gen_file_line_table(FakeDwarf()) - [] - ''' - # generate the table - table = [] - for unit in dwarfinfo.iter_CUs(): - compile_dir = _get_compile_dir(unit) - lineprog = dwarfinfo.line_program_for_CU(unit) - prevstate = None - for entry in lineprog.get_entries(): - # We're interested in those entries where a new state is assigned - if entry.state is None or entry.state.end_sequence: - continue - # Looking for a range of addresses in two consecutive states that - # contain a required address. - if prevstate is not None: - filename = lineprog['file_entry'][prevstate.file - 1].name - dirno = lineprog['file_entry'][prevstate.file - 1].dir_index - filepath = os.path.join( - lineprog['include_directory'][dirno - 1], filename) - if compile_dir and not os.path.isabs(filepath): # make absolute - filepath = os.path.join(compile_dir, filepath) - line = prevstate.line - fromaddr = prevstate.address - toaddr = max(fromaddr, entry.state.address) - table.append((filepath, line, fromaddr, toaddr)) - prevstate = entry.state - - # If there are no functions, then return an empty list - if len(table) == 0: - return [] - - # consolidate the table - consolidated = [] - prev = table[0] - for entry in table[1:]: - if prev[1] == entry[1] and prev[3] == entry[2]: - prev = (prev[0], prev[1], prev[2], entry[3]) - else: - consolidated.append(prev) - prev = entry - consolidated.append(prev) - - return consolidated - -def _get_compile_dir(compile_unit): - ''' - Returns the directory where the compile unit was compiled from. - - @param compile_unit: A CU from a dwarfinfo.iterCUs() - - @return (bytes) the DW_AT_comp_dir attribute for the given compile unit - or None if this attribute is missing - ''' - die = next(compile_unit.iter_DIEs()) # first DIE - key = 'DW_AT_comp_dir' - if key in die.attributes: - return die.attributes['DW_AT_comp_dir'].value - else: - return None + if not offset_symbol_tuples: + return {} + proc = subp.Popen(['addr2line', '-e', objfile], stdin=subp.PIPE, + stdout=subp.PIPE) + out, _ = proc.communicate('\n'.join(x[0] for x in offset_symbol_tuples) + .encode()) + info = out.decode('utf8').splitlines() + assert len(info) == len(offset_symbol_tuples), \ + 'len(info) = {}, len(offset_symbol_tuples) = {}'\ + .format(len(info), len(offset_symbol_tuples)) + mapping = {} + for line, symbol in zip(info, (x[1] for x in offset_symbol_tuples)): + filename, linenumber = line.strip().split(':') + if filename == '??' or linenumber == '0': + filename = None + linenumber = None + mapping[symbol] = (filename, linenumber) + return mapping diff --git a/scripts/flitcli/flitelf_nm.py b/scripts/flitcli/flitelf_nm.py deleted file mode 100644 index 5463bd3d..00000000 --- a/scripts/flitcli/flitelf_nm.py +++ /dev/null @@ -1,204 +0,0 @@ -# -- LICENSE BEGIN -- -# -# Copyright (c) 2015-2020, Lawrence Livermore National Security, LLC. -# -# Produced at the Lawrence Livermore National Laboratory -# -# Written by -# Michael Bentley (mikebentley15@gmail.com), -# Geof Sawaya (fredricflinstone@gmail.com), -# and Ian Briggs (ian.briggs@utah.edu) -# under the direction of -# Ganesh Gopalakrishnan -# and Dong H. Ahn. -# -# LLNL-CODE-743137 -# -# All rights reserved. -# -# This file is part of FLiT. For details, see -# https://pruners.github.io/flit -# Please also read -# https://github.com/PRUNERS/FLiT/blob/master/LICENSE -# -# Redistribution and use in source and binary forms, with or -# without modification, are permitted provided that the following -# conditions are met: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the disclaimer below. -# -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the disclaimer -# (as noted below) in the documentation and/or other materials -# provided with the distribution. -# -# - Neither the name of the LLNS/LLNL nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND -# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, -# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL -# SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -# THE POSSIBILITY OF SUCH DAMAGE. -# -# Additional BSD Notice -# -# 1. This notice is required to be provided under our contract -# with the U.S. Department of Energy (DOE). This work was -# produced at Lawrence Livermore National Laboratory under -# Contract No. DE-AC52-07NA27344 with the DOE. -# -# 2. Neither the United States Government nor Lawrence Livermore -# National Security, LLC nor any of their employees, makes any -# warranty, express or implied, or assumes any liability or -# responsibility for the accuracy, completeness, or usefulness of -# any information, apparatus, product, or process disclosed, or -# represents that its use would not infringe privately-owned -# rights. -# -# 3. Also, reference herein to any specific commercial products, -# process, or services by trade name, trademark, manufacturer or -# otherwise does not necessarily constitute or imply its -# endorsement, recommendation, or favoring by the United States -# Government or Lawrence Livermore National Security, LLC. The -# views and opinions of authors expressed herein do not -# necessarily state or reflect those of the United States -# Government or Lawrence Livermore National Security, LLC, and -# shall not be used for advertising or product endorsement -# purposes. -# -# -- LICENSE END -- - -''' -Utility functions for dealing with ELF binary files. This file uses -alternative methods to do this functionality that does not require the -pyelftools package. But this file gives the same public interface as -pyelftools so that it can be used as a replacement. - -Instead, this package uses binutils through subprocesses. The programs used -are "nm" and "c++filt" to perform the same functionality. -''' - -from collections import namedtuple, defaultdict -import subprocess as subp -import os -import shutil - -if not shutil.which('nm') or not shutil.which('c++filt'): - raise ImportError('Cannot find binaries "nm" and "c++filt"') - -SymbolTuple = namedtuple('SymbolTuple', - 'symbol, demangled, fname, lineno') -SymbolTuple.__doc__ = ''' -Tuple containing information about the symbols in a file. Has the following -attributes: - symbol: mangled symbol in the compiled version - demangled: demangled version of symbol - fname: filename where the symbol is defined. - lineno: line number of definition within fname. -''' - -def extract_symbols(objfile_or_list): - ''' - Extracts symbols for the given object file. - - @param objfile_or_list: (str or list(str)) path to object file(s) - - @return two lists of SymbolTuple objects (funcsyms, remaining). - The first is the list of exported functions that are strong symbols and - have a filename and line number where they are defined. The second is - all remaining symbols that are strong, exported, and defined. - ''' - funcsym_tuples = [] - remaining_tuples = [] - nm_args = [ - 'nm', - '--print-file-name', - '--extern-only', - '--defined-only', - ] - if isinstance(objfile_or_list, str): - nm_args.append(objfile_or_list) - else: - nm_args.extend(objfile_or_list) - symbol_strings = subp.check_output(nm_args).decode('utf-8').splitlines() - - obj_symbols = defaultdict(list) - symbols = [] - for symbol_string in symbol_strings: - loc, stype, symbol = symbol_string.split(maxsplit=2) - objfile, offset = loc.split(':') - symbols.append(symbol) - obj_symbols[objfile].append((offset, stype, symbol)) - - demangle_map = dict(zip(symbols, _demangle(symbols))) - - fileinfo_map = {} - linenumber_map = {} - for obj, symlist in obj_symbols.items(): - to_check = [] - for offset, stype, symbol in symlist: - if symbol in fileinfo_map and fileinfo_map[symbol]: - continue - elif stype.lower() != 't': - fileinfo_map[symbol] = (None, None) - else: - to_check.append((offset, symbol)) - fileinfo_map.update(_fnames_and_line_numbers(obj, to_check)) - - for symbol in symbols: - fnam, line = fileinfo_map[symbol] - symbol_tuple = SymbolTuple(symbol, demangle_map[symbol], fnam, line) - if fnam: - funcsym_tuples.append(symbol_tuple) - else: - remaining_tuples.append(symbol_tuple) - - return funcsym_tuples, remaining_tuples - -def _demangle(symbol_list): - 'Demangles each C++ name in the given list' - if not symbol_list: - return [] - proc = subp.Popen(['c++filt'], stdin=subp.PIPE, stdout=subp.PIPE) - out, _ = proc.communicate('\n'.join(symbol_list).encode()) - demangled = out.decode('utf8').splitlines() - assert len(demangled) == len(symbol_list) - return demangled - -def _fnames_and_line_numbers(objfile, offset_symbol_tuples): - ''' - Given a list of tuples of (offset, symbol), return a single dictionaries, a - mapping from symbol name to a tuple of (filename, line number). If the - filename and/or line number could not be determined, then both will be set - to None. - ''' - if not offset_symbol_tuples: - return {} - proc = subp.Popen(['addr2line', '-e', objfile], stdin=subp.PIPE, - stdout=subp.PIPE) - out, _ = proc.communicate('\n'.join(x[0] for x in offset_symbol_tuples) - .encode()) - info = out.decode('utf8').splitlines() - assert len(info) == len(offset_symbol_tuples), \ - 'len(info) = {}, len(offset_symbol_tuples) = {}'\ - .format(len(info), len(offset_symbol_tuples)) - mapping = {} - for line, symbol in zip(info, (x[1] for x in offset_symbol_tuples)): - filename, linenumber = line.strip().split(':') - if filename == '??' or linenumber == '0': - filename = None - linenumber = None - mapping[symbol] = (filename, linenumber) - return mapping diff --git a/tests/flit_install/tst_install_runthrough.py b/tests/flit_install/tst_install_runthrough.py index 1e96098e..0eecb97d 100644 --- a/tests/flit_install/tst_install_runthrough.py +++ b/tests/flit_install/tst_install_runthrough.py @@ -302,7 +302,6 @@ 'share/flit/scripts/flit_update.py', 'share/flit/scripts/flitconfig.py', 'share/flit/scripts/flitelf.py', - 'share/flit/scripts/flitelf_nm.py', 'share/flit/scripts/flitutil.py', 'share/flit/src', 'share/flit/src/ALL-FLIT.cpp', From 8b80cbbfc3190cd39a95ef9a6dfa5b262ea8cce5 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Sat, 3 Oct 2020 17:02:46 -0600 Subject: [PATCH 20/22] travis: accidentally removed toml installation --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 394eb218..2d7b222b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -120,6 +120,7 @@ matrix: before_install: - pip3 install --user setuptools + - pip3 install --user toml script: dpkg --list | grep binutil && make -j4 && make -j4 -C tests && make check From 9591f6027b242e731a7ac88ffe60c55e78d31ca5 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Sat, 3 Oct 2020 22:55:46 -0600 Subject: [PATCH 21/22] flit_disguise: remove --fields and add --jobs --- scripts/flitcli/flit_disguise.py | 33 +++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/scripts/flitcli/flit_disguise.py b/scripts/flitcli/flit_disguise.py index 51faf08b..85bec40c 100644 --- a/scripts/flitcli/flit_disguise.py +++ b/scripts/flitcli/flit_disguise.py @@ -88,6 +88,7 @@ import argparse import csv import glob +import multiprocessing as mp import os import re import subprocess as subp @@ -148,20 +149,15 @@ def populate_parser(parser=None): analysis done by someone with the anonymized file(s). ''') - parser.add_argument('--fields', default='file,function,test', + parser.add_argument('-j', '--jobs', default=None, help=''' - A comma-separated list of fields you want to - disguise. This will not effect the generated - disguise map, all fields will be present there. It - will just impact the disguising and the undo - operations. - - Available fields are - (1) "file": source file name and file path - (including compiled object file(s)), - (2) "function": function name, both mangled and - demangled, and - (3) "test": name of the test. + When generating the disguise map, we may need to + compile the gtrun executable using the + autogenerated flit Makefile. This flag specifies + the number of jobs to give to GNU make. The + default behavior is to defer to the MAKEFLAGS + environment variable. If that variable is not set, + then we will use the number of processors. ''') parser.add_argument('file', nargs='?', help=''' @@ -176,11 +172,18 @@ def populate_parser(parser=None): ''') return parser -def generate_disguise_map(outfile='disguise.csv'): +def generate_disguise_map(outfile='disguise.csv', jobs=None): 'Generate the disguise map, often called from the Makefile' + if not jobs and not hasattr(os.environ, 'MAKEFLAGS'): + jobs = mp.cpu_count() + # make sure gtrun is compiled - subp.check_call(['make', 'gtrun']) + make_args = ['make', 'gtrun'] + if jobs: + make_args.append('-j{}'.format(jobs)) + subp.check_call(make_args) + makevars = util.extract_make_vars() # get list of source files From a3f9a1e443db7566f7b88a44ea707e6111240294 Mon Sep 17 00:00:00 2001 From: Michael Bentley Date: Sat, 3 Oct 2020 23:15:48 -0600 Subject: [PATCH 22/22] Update bash_completion from changed cli of flit disguise --- scripts/bash-completion/flit | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/scripts/bash-completion/flit b/scripts/bash-completion/flit index 19d6abff..c00fef14 100644 --- a/scripts/bash-completion/flit +++ b/scripts/bash-completion/flit @@ -105,7 +105,7 @@ _flit_disguise() -o --output -m --disguise-map -u --undo - --fields" + -j --jobs" # file field case "${prev}" in @@ -115,26 +115,8 @@ _flit_disguise() return 0 ;; - --fields) - # three possible fields: file, function, test - # TODO: separate on comma and only complete since the last comma - local possibilities=" - file - file,function - file,function,test - file,test - file,test,function - function - function,file - function,file,test - function,test - function,test,file - test - test,file - test,file,function - test,function - test,function,file" - COMPREPLY=( $(compgen -W "${possibilities}" -- "${cur}") ) + -j|--jobs) + # do no completion -- numbers return 0 ;;