diff --git a/codebasin/__init__.py b/codebasin/__init__.py index 002f03f..9530e88 100644 --- a/codebasin/__init__.py +++ b/codebasin/__init__.py @@ -1,7 +1,9 @@ # Copyright (C) 2019-2024 Intel Corporation # SPDX-License-Identifier: BSD-3-Clause +import shlex import warnings +import codebasin.source import codebasin.walkers warnings.warn( @@ -11,3 +13,113 @@ + "a future release of Code Base Investigator.", DeprecationWarning, ) + + +class CompileCommand: + """ + A single compile command from a compilation database. + + Attributes + ---------- + filename: string + The name of the source file compiled by this command. + + directory: string, optional + The working directory for this command. + + arguments: list[string], optional + The `argv` for this command, including the executable as `argv[0]`. + + output: string, optional + The name of the file produced by this command, or None if not + specified. + """ + + def __init__( + self, + filename, + directory=None, + arguments=None, + command=None, + output=None, + ): + """ + Raises + ------ + ValueError + If both arguments and command are None. + """ + self._filename = filename + self._directory = directory + if arguments is None and command is None: + raise ValueError("CompileCommand requires arguments or command.") + self._arguments = arguments + self._command = command + self._output = output + + @property + def directory(self): + return self._directory + + @property + def filename(self): + return self._filename + + @property + def arguments(self): + if self._arguments is None: + return shlex.split(self._command) + else: + return self._arguments + + @property + def output(self): + return self._output + + def __str__(self): + if self._command is None: + return " ".join(self._arguments) + else: + return self._command + + def is_supported(self): + """ + Returns + ------- + bool + True if the command can be emulated and False otherwise. + Commands that are not supported will not impact analysis. + """ + # Commands must be non-empty in order to do something. + # Commands must operate on source files. + if len(self.arguments) > 0 and codebasin.source.is_source_file( + self.filename, + ): + return True + + return False + + @classmethod + def from_json(cls, instance: dict): + """ + Parameters + ---------- + instance: dict + A JSON object representing a single compile command. + + Returns + ------- + CompileCommand + A CompileCommand corresponding to the JSON object. + """ + directory = instance.get("directory", None) + arguments = instance.get("arguments", None) + command = instance.get("command", None) + output = instance.get("output", None) + return cls( + instance["file"], + directory=directory, + arguments=arguments, + command=command, + output=output, + ) diff --git a/codebasin/config.py b/codebasin/config.py index 26ba7fa..53182dd 100644 --- a/codebasin/config.py +++ b/codebasin/config.py @@ -11,12 +11,11 @@ import logging import os import re -import shlex import warnings import yaml -from codebasin import util +from codebasin import CompileCommand, util log = logging.getLogger("codebasin") @@ -418,11 +417,10 @@ def load_database(dbpath, rootdir): configuration = [] for e in db: - # Database may not have tokenized arguments - if "command" in e: - argv = shlex.split(e["command"]) - elif "arguments" in e: - argv = e["arguments"] + command = CompileCommand.from_json(e) + if not command.is_supported(): + continue + argv = command.arguments # Extract defines, include paths and include files # from command-line arguments @@ -444,19 +442,19 @@ def load_database(dbpath, rootdir): # - relative to a directory # - as an absolute path filedir = rootdir - if "directory" in e: - if os.path.isabs(e["directory"]): - filedir = e["directory"] + if command.directory is not None: + if os.path.isabs(command.directory): + filedir = command.directory else: filedir = os.path.realpath( rootdir, - os.path.join(e["directory"]), + os.path.join(command.directory), ) - if os.path.isabs(e["file"]): - path = os.path.realpath(e["file"]) + if os.path.isabs(command.filename): + path = os.path.realpath(command.filename) else: - path = os.path.realpath(os.path.join(filedir, e["file"])) + path = os.path.realpath(os.path.join(filedir, command.filename)) # Compilation database may contain files that don't # exist without running make diff --git a/codebasin/source.py b/codebasin/source.py new file mode 100644 index 0000000..53dae02 --- /dev/null +++ b/codebasin/source.py @@ -0,0 +1,63 @@ +# Copyright (C) 2019-2024 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause + +import os +from pathlib import Path +from typing import Union + + +def is_source_file(filename: Union[str, os.PathLike]) -> bool: + """ + Parameters + ---------- + filename: Union[str, os.Pathlike] + The filename of a potential source file. + + Returns + ------- + bool + True if the file ends in a recognized extension and False otherwise. + Only files that can be parsed correctly have recognized extensions. + + Raises + ------ + TypeError + If filename is not a string or Path. + """ + if not (isinstance(filename, str) or isinstance(filename, Path)): + raise TypeError("filename must be a string or Path") + + extension = Path(filename).suffix + supported_extensions = [ + ".f90", + ".F90", + ".f", + ".ftn", + ".fpp", + ".F", + ".FOR", + ".FTN", + ".FPP", + ".c", + ".h", + ".c++", + ".cxx", + ".cpp", + ".cc", + ".hpp", + ".hxx", + ".h++", + ".hh", + ".inc", + ".inl", + ".tcc", + ".icc", + ".ipp", + ".cu", + ".cuh", + ".cl", + ".s", + ".S", + ".asm", + ] + return extension in supported_extensions diff --git a/tests/compile-command/__init__.py b/tests/compile-command/__init__.py new file mode 100644 index 0000000..94adb81 --- /dev/null +++ b/tests/compile-command/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2019-2024 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause diff --git a/tests/compile-command/test_compile_command.py b/tests/compile-command/test_compile_command.py new file mode 100644 index 0000000..06ad714 --- /dev/null +++ b/tests/compile-command/test_compile_command.py @@ -0,0 +1,70 @@ +# Copyright (C) 2019-2024 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause + +import unittest + +from codebasin import CompileCommand + + +class TestCompileCommand(unittest.TestCase): + """ + Test CompileCommand class. + """ + + def test_commands_and_arguments(self): + """Check commands and arguments are not both None""" + + with self.assertRaises(ValueError): + CompileCommand("file.cpp", command=None, arguments=None) + + with self.assertRaises(ValueError): + instance = { + "file": "file.cpp", + } + CompileCommand.from_json(instance) + + def test_command_to_arguments(self): + """Check commands convert to arguments""" + command = CompileCommand("file.cpp", command="c++ file.cpp") + self.assertEqual(command.arguments, ["c++", "file.cpp"]) + + instance = { + "file": "file.cpp", + "command": "c++ file.cpp", + } + command = CompileCommand.from_json(instance) + self.assertEqual(command.arguments, ["c++", "file.cpp"]) + + def test_arguments_to_command(self): + """Check arguments convert to command""" + command = CompileCommand("file.cpp", arguments=["c++", "file.cpp"]) + self.assertEqual(str(command), "c++ file.cpp") + + instance = { + "file": "file.cpp", + "arguments": [ + "c++", + "file.cpp", + ], + } + command = CompileCommand.from_json(instance) + self.assertEqual(str(command), "c++ file.cpp") + + def test_empty_command(self): + """Check empty commands are not supported""" + command = CompileCommand("file.cpp", command="") + self.assertFalse(command.is_supported()) + + def test_link_command(self): + """Check link commands are not supported""" + command = CompileCommand("file.o", command="c++ -o a.out file.o") + self.assertFalse(command.is_supported()) + + def test_valid_command(self): + """Check valid commands are supported""" + command = CompileCommand("file.cpp", command="c++ file.cpp") + self.assertTrue(command.is_supported()) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/source/__init__.py b/tests/source/__init__.py new file mode 100644 index 0000000..94adb81 --- /dev/null +++ b/tests/source/__init__.py @@ -0,0 +1,2 @@ +# Copyright (C) 2019-2024 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause diff --git a/tests/source/test_source.py b/tests/source/test_source.py new file mode 100644 index 0000000..3660423 --- /dev/null +++ b/tests/source/test_source.py @@ -0,0 +1,36 @@ +# Copyright (C) 2019-2024 Intel Corporation +# SPDX-License-Identifier: BSD-3-Clause + +import unittest +from pathlib import Path + +import codebasin.source as source + + +class TestSource(unittest.TestCase): + """ + Test functionality in the source module. + """ + + def test_is_source_file_string(self): + """Check source file identification for string filenames""" + self.assertTrue(source.is_source_file("file.cpp")) + self.assertTrue(source.is_source_file("/path/to/file.cpp")) + self.assertFalse(source.is_source_file("file.o")) + self.assertFalse(source.is_source_file("/path/to/file.o")) + + def test_is_source_file_path(self): + """Check source file identification for Path filenames""" + self.assertTrue(source.is_source_file(Path("file.cpp"))) + self.assertTrue(source.is_source_file(Path("/path/to/file.cpp"))) + self.assertFalse(source.is_source_file(Path("file.o"))) + self.assertFalse(source.is_source_file(Path("/path/to/file.o"))) + + def test_is_source_types(self): + """Check type validation for is_source""" + with self.assertRaises(TypeError): + source.is_source_file(1) + + +if __name__ == "__main__": + unittest.main()